kunhunjon commited on Nov 21, 2025

Commit

bdbfdea

verified ·

1 Parent(s): 5784554

Upload ChessLM Qwen3 Neuron model in AWS format structure

Browse files

Files changed (34) hide show

.gitattributes +8 -0
README.md +176 -0
WEIGHTS_README.md +36 -0
added_tokens.json +28 -0
config.json +69 -0
context_encoding_model/_tp0_bk0/command.txt +1 -0
context_encoding_model/_tp0_bk0/compile_flags.MODULE_e80578c547275f02c0fa+ed72d204.json +1 -0
context_encoding_model/_tp0_bk0/global_metric_store.json +1051 -0
context_encoding_model/_tp0_bk0/graph.neff +3 -0
context_encoding_model/_tp0_bk0/log-neuron-cc.txt +0 -0
context_encoding_model/_tp0_bk0/metaneff.pb +3 -0
context_encoding_model/_tp0_bk0/model.MODULE_e80578c547275f02c0fa+ed72d204.hlo_module.pb +3 -0
context_encoding_model/_tp0_bk0/model.MODULE_e80578c547275f02c0fa+ed72d204.neff +3 -0
layout_opt/command.txt +1 -0
layout_opt/graph.neff +3 -0
layout_opt/log-neuron-cc.txt +0 -0
layout_opt/metaneff +982 -0
layout_opt/model/graph.hlo +3 -0
merges.txt +0 -0
model.pt +3 -0
neuron_config.json +43 -0
special_tokens_map.json +31 -0
token_generation_model/_tp0_bk0/command.txt +1 -0
token_generation_model/_tp0_bk0/compile_flags.MODULE_8f245c7816a398e13e79+a9d440f5.json +1 -0
token_generation_model/_tp0_bk0/global_metric_store.json +524 -0
token_generation_model/_tp0_bk0/graph.neff +3 -0
token_generation_model/_tp0_bk0/log-neuron-cc.txt +0 -0
token_generation_model/_tp0_bk0/metaneff.pb +3 -0
token_generation_model/_tp0_bk0/model.MODULE_8f245c7816a398e13e79+a9d440f5.hlo_module.pb +3 -0
token_generation_model/_tp0_bk0/model.MODULE_8f245c7816a398e13e79+a9d440f5.neff +3 -0
token_generation_model/_tp0_bk0/wrapped_neff.hlo +3 -0
tokenizer.json +3 -0
tokenizer_config.json +247 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+context_encoding_model/_tp0_bk0/graph.neff filter=lfs diff=lfs merge=lfs -text
+context_encoding_model/_tp0_bk0/model.MODULE_e80578c547275f02c0fa+ed72d204.neff filter=lfs diff=lfs merge=lfs -text
+layout_opt/graph.neff filter=lfs diff=lfs merge=lfs -text
+layout_opt/model/graph.hlo filter=lfs diff=lfs merge=lfs -text
+token_generation_model/_tp0_bk0/graph.neff filter=lfs diff=lfs merge=lfs -text
+token_generation_model/_tp0_bk0/model.MODULE_8f245c7816a398e13e79+a9d440f5.neff filter=lfs diff=lfs merge=lfs -text
+token_generation_model/_tp0_bk0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,176 @@

+---
+language:
+- en
+license: apache-2.0
+pipeline_tag: text-generation
+tags:
+- chess
+- neuron
+- aws-trainium
+- vllm
+- optimum-neuron
+- continuous-batching
+base_model: karanps/ChessLM_Qwen3
+---
+# ChessLM Qwen3 - Neuron Traced (AWS Format Structure)
+This is a Neuron-traced version of [karanps/ChessLM_Qwen3](https://huggingface.co/karanps/ChessLM_Qwen3) optimized for AWS Trainium (trn1) and Inferentia (inf2) instances using vLLM with **continuous batching enabled**.
+This model follows the AWS Neuron repository structure with separate directories for compiled artifacts.
+## Model Details
+- **Base Model**: Qwen3-2B fine-tuned for chess
+- **Compilation**: optimum-neuron[vllm]==0.3.0
+- **Compiler Version**: neuronxcc 2.21.33363.0
+- **Target Hardware**: AWS Trainium (trn1) / Inferentia (inf2)
+- **Precision**: BF16
+- **Tensor Parallelism**: 2 cores
+- **Batch Size**: 4 (continuous batching enabled)
+- **Max Sequence Length**: 2048
+## Repository Structure
+This repository follows the AWS Neuron format with organized directories:
+```
+├── context_encoding_model/
+│   └── _tp0_bk0/
+│       ├── graph.neff
+│       ├── model.MODULE_*.neff
+│       ├── model.MODULE_*.hlo_module.pb
+│       ├── compile_flags.*.json
+│       ├── neuron_config.json
+│       └── log-neuron-cc.txt
+├── token_generation_model/
+│   └── _tp0_bk0/
+│       ├── graph.neff
+│       ├── model.MODULE_*.neff
+│       ├── model.MODULE_*.hlo_module.pb
+│       ├── wrapped_neff.hlo
+│       ├── compile_flags.*.json
+│       ├── neuron_config.json
+│       └── log-neuron-cc.txt
+├── layout_opt/
+│   ├── graph.neff
+│   ├── log-neuron-cc.txt
+│   └── model/
+│       └── graph.hlo
+├── model.pt (17GB - contains compiled graphs + weights)
+├── config.json
+├── neuron_config.json
+└── tokenizer files
+```
+### Key Files
+- **context_encoding_model/**: Compiled NEFF files for processing initial prompt sequences (up to 2048 tokens)
+- **token_generation_model/**: Compiled NEFF files for autoregressive token generation
+- **layout_opt/**: Layout optimization artifacts from compilation
+- **model.pt**: Main model file containing compiled graphs and embedded weights (17GB)
+- **neuron_config.json**: Neuron compilation configuration
+## Difference from AWS Reference Format
+The AWS Neuron reference models (e.g., `aws-neuron/Qwen3-1.7B-TP2-BS8-SEQ4096`) typically have:
+- A `weights/` directory with separate safetensors files (e.g., `tp0_sharded_checkpoint.safetensors`)
+- A smaller model.pt (e.g., ~100MB) containing just the model structure
+**This model** has:
+- Weights embedded within model.pt (17GB)
+- An empty `weights/` directory (preserved for format compatibility)
+This is because Neuron-compiled models with optimum-neuron[vllm]==0.3.0 bundle weights within the compiled format. The weights are optimized and embedded in the NEFF (Neuron Executable File Format) during compilation. This is a valid alternative implementation that provides the same functionality.
+## Requirements
+```bash
+pip install optimum-neuron[vllm]==0.3.0
+pip install neuronx-distributed --extra-index-url=https://pip.repos.neuron.amazonaws.com
+```
+## Usage
+### Loading the Model
+```python
+from optimum.neuron import NeuronModelForCausalLM
+from transformers import AutoTokenizer
+# Load the model
+model = NeuronModelForCausalLM.from_pretrained("kunhunjon/ChessLM_Qwen3_Trainium_AWS_Format")
+tokenizer = AutoTokenizer.from_pretrained("kunhunjon/ChessLM_Qwen3_Trainium_AWS_Format")
+# Run inference
+prompt = "e2e4"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, max_new_tokens=20)
+result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(result)
+```
+### Hardware Requirements
+- AWS Trainium (trn1.32xlarge, trn1.2xlarge) or Inferentia (inf2) instances
+- At least 2 Neuron cores (as configured during tracing)
+- Minimum 32GB RAM recommended
+## Compilation Details
+This model was traced with the following parameters:
+- `batch_size=4`
+- `sequence_length=2048`
+- `num_cores=2`
+- `auto_cast_type="bf16"`
+- `continuous_batching=True`
+### Compilation Artifacts
+The separate directories contain all compilation artifacts:
+- **NEFF files**: Neuron Executable File Format - the compiled compute graphs
+- **HLO files**: High-Level Operations - intermediate representation
+- **Compilation logs**: Detailed logs from neuronx-cc compiler
+- **Metadata**: Configuration and metrics from compilation
+### Continuous Batching
+This model is compiled with **continuous batching enabled**, which allows vLLM to:
+- Process multiple requests simultaneously with dynamic batch sizes up to 4
+- Optimize throughput by batching requests with different sequence lengths
+- Reduce latency for concurrent inference workloads
+**Note**: On-device sampling is disabled due to a known Neuron runtime limitation when using tensor parallelism with 2 cores. Sampling is handled on the host instead.
+## Compilation Metrics
+- **Total compilation time**: ~8.1 minutes
+- **Token generation model**: 219 seconds
+- **Context encoding model**: 165 seconds
+- **Compiler**: neuronxcc 2.21.33363.0
+- **Model size**: 17GB (with embedded weights)
+## Model Files
+| File | Purpose |
+|------|---------|
+| model.pt | Main model with embedded weights (17GB) |
+| config.json | Base model configuration |
+| neuron_config.json | Neuron compilation settings |
+| tokenizer* | Tokenizer files for text processing |
+| context_encoding_model/ | Compiled graphs for prompt processing |
+| token_generation_model/ | Compiled graphs for token generation |
+| layout_opt/ | Weight layout optimization artifacts |
+## License
+This model inherits the license from the base model [karanps/ChessLM_Qwen3](https://huggingface.co/karanps/ChessLM_Qwen3).
+## Citation
+If you use this model, please cite the original ChessLM model and AWS Neuron tools.
+## See Also
+- **Sharded version**: [kunhunjon/ChessLM_Qwen3_Trainium_Sharded](https://huggingface.co/kunhunjon/ChessLM_Qwen3_Trainium_Sharded) - Model split into 9x2GB shards for easier downloading
+- **Standard version**: [kunhunjon/ChessLM_Qwen3_Trainium](https://huggingface.co/kunhunjon/ChessLM_Qwen3_Trainium) - Single model.pt file

WEIGHTS_README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+# Weights Information
+This model contains weights bundled within model.pt (17GB).
+In the AWS Neuron reference format, weights are typically stored separately as:
+- `weights/tp0_sharded_checkpoint.safetensors`
+- `weights/tp1_sharded_checkpoint.safetensors`
+To extract weights to safetensors format, you would need to:
+1. Load the model using optimum-neuron
+2. Extract the state_dict
+3. Convert to safetensors format
+4. Shard by tensor parallel rank
+This is currently not straightforward for compiled Neuron models as the weights
+are embedded in the compiled format.
+## Current Structure
+The model.pt file contains:
+- Compiled graphs (NEFF format)
+- Model weights (optimized for Neuron)
+- Runtime metadata
+The separate directories contain:
+- `context_encoding_model/`: NEFF files for context encoding
+- `token_generation_model/`: NEFF files for token generation
+- `layout_opt/`: Layout optimization artifacts
+##Usage
+Load this model using:
+```python
+from optimum.neuron import NeuronModelForCausalLM
+model = NeuronModelForCausalLM.from_pretrained("path/to/model")
+```

added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

config.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "float32",
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "pad_token_id": 151643,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

context_encoding_model/_tp0_bk0/command.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ neuronx-cc compile --framework=XLA model.MODULE_e80578c547275f02c0fa+ed72d204.hlo_module.pb --output model.MODULE_e80578c547275f02c0fa+ed72d204.neff --target=trn1 --auto-cast=none --model-type=transformer '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ' -O2 --lnc=1 --logfile=log-neuron-cc.txt --verbose=35

context_encoding_model/_tp0_bk0/compile_flags.MODULE_e80578c547275f02c0fa+ed72d204.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]

context_encoding_model/_tp0_bk0/global_metric_store.json ADDED Viewed

	@@ -0,0 +1,1051 @@

+{
+  "Average": {
+    "tensorizer": {
+      "StaticProfiler::AverageFractalPeUtilization": 99.99919128417969,
+      "StaticProfiler::AveragePartitionUtilization": 99.9390869140625,
+      "StaticProfiler::AveragePeUtilization": 99.99919128417969,
+      "StaticProfiler::LocalizationEfficiency": 52.21323013305664,
+      "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 56.50444793701172,
+      "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
+      "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0
+    }
+  },
+  "Count": {
+    "tensorizer": {
+      "StaticProfiler::AverageFractalPeUtilization": 1.0,
+      "StaticProfiler::AveragePartitionUtilization": 1.0,
+      "StaticProfiler::AveragePeUtilization": 1.0,
+      "StaticProfiler::LocalizationEfficiency": 1.0,
+      "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1.0,
+      "TilingProfiler::AveragePartitionUtilizationAfterTiling": 1.0,
+      "TilingProfiler::AveragePeUtilizationAfterTiling": 1.0
+    }
+  },
+  "Sum": {
+    "compiletime": {
+      "AGOrderingAnalysisPass": 0.07080268859863281,
+      "AffinePredicateResolution": 0.001844644546508789,
+      "AliasDependencyElimination": 0.0001308917999267578,
+      "AliasDependencyInduction": 0.012178182601928711,
+      "AliasDependencyReset": 0.027022123336791992,
+      "BFComputeCutting": 0.0061855316162109375,
+      "BirCodeGenLoop": 0.17315936088562012,
+      "CCOpFusion": 0.08119010925292969,
+      "CanonicalizeConv": 0.00011999999696854502,
+      "CanonicalizeDAGForPGTiling": 0.0066263675689697266,
+      "CanonicalizeForTensorizer": 4.400000034365803e-05,
+      "CanonicalizeIR": 0.0033893585205078125,
+      "Canonicalizer": 0.0010000000474974513,
+      "CoalesceCCOp": 0.0032279491424560547,
+      "CommuteConcat": 0.0016355514526367188,
+      "DMALocalityOpt": 0.0021250247955322266,
+      "DMAProfiler": 0.005837917327880859,
+      "DMATilingProfiler": 0.010099172592163086,
+      "DataLocalityOpt": 0.25125575065612793,
+      "DataStreaming": 0.010326147079467773,
+      "DeConcat": 0.002583742141723633,
+      "DeadCodeElimination": 0.0018777847290039063,
+      "DeadStoreElimination": 0.05162811279296875,
+      "DelinearIndices": 0.017117977142333984,
+      "Delinearization": 0.006100893020629883,
+      "DoNothing": 6.961822509765625e-05,
+      "DramToDramTranspose": 0.0047817230224609375,
+      "DumpGraphAndMetadata": 0.008226871490478516,
+      "EliminateDivs": 0.005579710006713867,
+      "ExpandBatchNorm": 0.0024263858795166016,
+      "ExpandISAMacro": 0.004798412322998047,
+      "FactorizeBlkDims": 0.059967756271362305,
+      "FactorizeThreadAxesInFreeDims": 0.003553628921508789,
+      "FlattenMacroLoop": 0.0051920413970947266,
+      "GenericAccessSimplifier": 0.0013842582702636719,
+      "HoistCompute": 1.2999999853491317e-05,
+      "IdentifyCrossPassTensors": 7.300000288523734e-05,
+      "InferInitValue": 0.07938385009765625,
+      "InferIntrinsicOnCC": 0.0170440673828125,
+      "InferNeuronTensor": 0.09969878196716309,
+      "InferNonlocalTensors": 0.11206626892089844,
+      "InferPSumTensor": 0.0999910831451416,
+      "InlineNativeKernels": 0.003079652786254883,
+      "InsertIOTransposes": 0.031575918197631836,
+      "InsertLocalTransposes": 0.01450800895690918,
+      "InsertOffloadedTransposes": 0.010621309280395508,
+      "LICM": 0.006058454513549805,
+      "LateLegalizeInst": 0.009308576583862305,
+      "LateLegalizePostSplit": 0.005577564239501953,
+      "LateLowerReshapeOp": 0.002005338668823242,
+      "LateLowerTensorOp": 0.006224155426025391,
+      "LateNeuronInstComb": 0.026279211044311523,
+      "LayoutPreprocessing": 0.045662879943847656,
+      "LayoutPreprocessingAndAnalysis": 0.08894896507263184,
+      "LayoutRequirementAnalysis": 0.014644384384155273,
+      "LegalizeCCOpLayout": 0.002945423126220703,
+      "LegalizeOpLevelAlias": 0.0015463829040527344,
+      "LegalizePartitionReduce": 0.0025038719177246094,
+      "LegalizeSundaAccess": 0.05164527893066406,
+      "LegalizeSundaMacro": 0.02343463897705078,
+      "LegalizeType": 0.007515668869018555,
+      "LocalLayoutOpt": 0.05471658706665039,
+      "LoopFusion": 0.009645700454711914,
+      "LoopSplitting": 0.0005736351013183594,
+      "LowerBroadcast": 0.0030760765075683594,
+      "LowerCCOpBlockAxis": 0.010100364685058594,
+      "LowerComplexBroadcast": 0.004259347915649414,
+      "LowerIntrinsics": 0.071380615234375,
+      "LowerTensorOp": 0.017409563064575195,
+      "LowerTranspose": 2.716614007949829,
+      "MacroGeneration": 0.15842843055725098,
+      "MaskPropagation": 0.004798412322998047,
+      "MemcastMotion": 3.300000025774352e-05,
+      "MemcpyElimination": 0.14069795608520508,
+      "MutateDataType": 0.0018687248229980469,
+      "NeuronAliasDependencyInduction": 0.0005295276641845703,
+      "NeuronAliasDependencyReset": 0.014295816421508789,
+      "NeuronInstComb": 0.014310121536254883,
+      "NeuronLICM": 0.01824188232421875,
+      "NeuronLoopFusion": 0.03763270378112793,
+      "NeuronLoopInterchange": 0.0033299922943115234,
+      "NeuronSimplifier": 0.02371072769165039,
+      "NeuronSimplifyPredicates": 0.010996103286743164,
+      "NeuronValueNumbering": 0.005862236022949219,
+      "OptimizeAliasedCopyChain": 0.0014340877532958984,
+      "OptimizeNKIKernels": 0.0033910274505615234,
+      "PAGLayoutOpt": 1.3955655097961426,
+      "PComputeCutting": 0.01343226432800293,
+      "PGLayoutTilingPipeline": 2.056190013885498,
+      "PGTiling": 0.29619383811950684,
+      "PadElimination": 0.000553131103515625,
+      "ParAxesAnnotation": 1.3579421043395996,
+      "PartialLoopFusion": 0.04784822463989258,
+      "PartialSimdFusion": 0.1002810001373291,
+      "PenguinizeFunctions": 4.199999966658652e-05,
+      "PerfectLoopNest": 0.0037620067596435547,
+      "PruneFunctions": 3.199999991920777e-05,
+      "RecognizeOpIdiom": 0.0069119930267333984,
+      "Recompute": 0.0003383159637451172,
+      "RelaxPredicates": 0.04035329818725586,
+      "Rematerialization": 0.003230571746826172,
+      "RemoveOptimizationBarriers": 7.599999662488699e-05,
+      "ReshapeWeights": 0.0011525154113769531,
+      "ResolveAccessConflict": 0.0065386295318603516,
+      "ResolveComplicatePredicates": 0.002877473831176758,
+      "RewriteReplicationMatmul": 0.0025200843811035156,
+      "RewriteWeights": 0.0038268566131591797,
+      "SFKVectorizer": 0.48886895179748535,
+      "ScatterMotion": 1.8999999156221747e-05,
+      "SimpleAllReduceTiling": 0.0031387805938720703,
+      "Simplifier": 0.004804134368896484,
+      "SimplifyMacroPredicates": 0.01790642738342285,
+      "SimplifyNeuronTensor": 0.020508527755737305,
+      "SimplifySlice": 0.00145721435546875,
+      "SimplifyTensor": 0.016368389129638672,
+      "SpillPSum": 0.049539804458618164,
+      "SplitAPUnionSets": 0.060128211975097656,
+      "SplitAccGrp": 0.0025734901428222656,
+      "StaticProfiler": 0.006608724594116211,
+      "StaticTransposeLocalTensor": 0.008615732192993164,
+      "SundaISel": 0.06819963455200195,
+      "TCTransform": 0.0016434192657470703,
+      "TensorInitialization": 0.013004541397094727,
+      "TensorOpSimplifier": 0.011576175689697266,
+      "TensorOpTransform": 0.04517507553100586,
+      "TensorizerLegalizationPass": 4.8000001697801054e-05,
+      "TileCCOps": 0.011648893356323242,
+      "TilingProfiler": 0.02406597137451172,
+      "TransformConvOp": 0.004629850387573242,
+      "TritiumFusion": 0.26013898849487305,
+      "ValueNumbering": 0.004456520080566406,
+      "VectorizeDMA": 0.009630918502807617,
+      "VectorizeMatMult": 0.046350955963134766,
+      "VerifySupportedOps": 4.3000000005122274e-05,
+      "WeightCoalescing": 0.0030286312103271484,
+      "ZeroSizeTensorElimination": 0.00014090538024902344,
+      "algsimp": 0.002338999882340431,
+      "batchnorm_expander": 4.099999932805076e-05,
+      "boundary-marker-removal": 1.4000000192027073e-05,
+      "call-inliner": 0.0003630000283010304,
+      "canonicalize-boundary-marker": 1.799999881768599e-05,
+      "collective-stream-id-checker": 6.0999998822808266e-05,
+      "comparison-expander": 0.0004900000058114529,
+      "computation-deduplicator": 5.7999997807201e-05,
+      "config-lowering": 8.399999933317304e-05,
+      "constant-statistics": 0.00037799999699927866,
+      "constant_folding": 0.00025699997786432505,
+      "cse": 5.8999998145736754e-05,
+      "dce": 6.299999949987978e-05,
+      "dot_decomposer": 0.00088900001719594,
+      "dynamic-slice-transpose": 1.2999999853491317e-05,
+      "eliminate-redundant-compare": 0.0002390000008745119,
+      "emit-offloaded-dropout": 4.199999966658652e-05,
+      "flatten-call-graph": 0.000893999997060746,
+      "fuse-send-recv": 6.500000017695129e-05,
+      "hilo-conditional-to-select": 1.5999999959603883e-05,
+      "hilo::LegalizeAlias": 1.3999999282532372e-05,
+      "hilo::NeuronInstCombine": 0.00014899999951012433,
+      "hilo::NeuronOpFusion": 3.099999958067201e-05,
+      "hilo::ReplaceTokenTypeWithU8Pass": 4.8999998398358e-05,
+      "hilo::ScheduleFusion": 2.7000001864507794e-05,
+      "hilo::SixtyFourHack": 7.100000220816582e-05,
+      "hilo::VerifyAliasing": 1.1000000085914508e-05,
+      "hlo-mac-count": 0.0006209999555721879,
+      "instruction-histogram": 0.0005789999850094318,
+      "io-con-pipe-begin": 6.000000212225132e-06,
+      "io-con-pipe-end": 9.999999974752427e-07,
+      "io-layout-normalization": 0.0008500000112690032,
+      "io-statistics": 4.3000000005122274e-05,
+      "legalize-ccops-for-tensorizer": 3.999999989900971e-06,
+      "legalize-compare": 1.2000000424450263e-05,
+      "lower-argminmax-custom-call": 1.2999998943996616e-05,
+      "map-inline": 0.0007699999841861427,
+      "metadata-naming": 5.499999679159373e-05,
+      "mlir::detail::OpToOpPassAdaptor": 0.0006780000403523445,
+      "mlir::hlo::MhloToPyPenguin": 0.003496000077575445,
+      "mlir::mhlo::LowerComplexExtraPass": 0.000299000006634742,
+      "mlir::mhlo::LowerComplexPass": 0.0007789999945089221,
+      "native-to-custom-softmax": 0.0005979999550618231,
+      "native-to-custom-softmax-dx": 0.0005189999938011169,
+      "neuron-hlo-verifier": 0.011839999817311764,
+      "operand_upcaster": 4.099999932805076e-05,
+      "opt-barrier-removal": 0.0004349999944679439,
+      "post-par-pipe-begin": 3.999999989900971e-06,
+      "post-par-pipe-end": 0.0,
+      "post-partition-simplification": 0.0016449999529868364,
+      "pre-par-pipe-begin": 1.9999999949504854e-06,
+      "pre-par-pipe-end": 0.0,
+      "pre-partition-simplification": 0.042899999767541885,
+      "replace-minimum-constant": 0.00044299999717622995,
+      "reshape-mover": 9.600000339560211e-05,
+      "simplify-concat": 0.0001939999929163605,
+      "simplify-while-loops": 7.500000356230885e-05,
+      "transform-variadic-reduce": 7.100000220816582e-05,
+      "tuple-simplifier": 0.00027200000477023423,
+      "unpack-nested-aws-ntwsr": 0.00031300002592615783,
+      "unroll-while-loop": 1.2999999853491317e-05,
+      "zero_sized_hlo_elimination": 0.0007820000173524022
+    },
+    "hilo": {
+      "ConstantSize": 1094.0,
+      "HloInputCount": 402.0,
+      "HloMacCount": 644556259328.0,
+      "HloOutputCount": 73.0,
+      "IfmapSize": 8795039744.0,
+      "OfmapSize": 604587520.0,
+      "OutputsReadFromCount": 0.0,
+      "PassthroughTensorsCount": 0.0,
+      "RedundantOutputCount": 0.0,
+      "Traffic": 2150728192.0
+    },
+    "tensorizer": {
+      "DMATilingProfiler::TotalInstructionsAfterTiling": 95441.0,
+      "StaticProfiler::AifUb": 832.489990234375,
+      "StaticProfiler::ArithmeticIntensityTensorizer": 434.669921875,
+      "StaticProfiler::AverageDmaLength": 1623.94287109375,
+      "StaticProfiler::DDRTransferBytes": 1990484992.0,
+      "StaticProfiler::InternalTransferBytes": 1161470464.0,
+      "StaticProfiler::LoadExpanded": 1195909.0,
+      "StaticProfiler::StoreExpanded": 28288.0,
+      "StaticProfiler::TotalDMAExpanded": 1224197.0,
+      "StaticProfiler::TotalDynamicInstancesCount": 120578.0,
+      "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 120578.0,
+      "StaticProfiler::TotalLNCComm": 0.0,
+      "StaticProfiler::TotalLNCCommTransfer": 0.0,
+      "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
+      "TilingProfiler::DmaInstructionsAfterTiling": 0.0,
+      "TilingProfiler::GenericInstructionsAfterTiling": 129.0,
+      "TilingProfiler::MatMultInstructionsAfterTiling": 63040.0,
+      "TilingProfiler::NumPfTransposes": 13.0,
+      "TilingProfiler::NumPfTransposesForIo": 3.0,
+      "TilingProfiler::NumPfTransposesForLocal": 8.0,
+      "TilingProfiler::NumPfTransposesForNonlocal": 2.0,
+      "TilingProfiler::PfTransposeInstructions": 25889.0,
+      "TilingProfiler::PfTransposeInstructionsForIo": 19040.0,
+      "TilingProfiler::PfTransposeInstructionsForLocal": 5825.0,
+      "TilingProfiler::PfTransposeInstructionsForNonlocal": 1024.0,
+      "TilingProfiler::ReduceInstructionsAfterTiling": 8.0,
+      "TilingProfiler::SimdInstructionsAfterTiling": 3225.0,
+      "TilingProfiler::TotalInstructionsAfterTiling": 0.0,
+      "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
+      "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
+      "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
+      "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
+      "TransformConvOp::conv2d_column_packing": 0.0,
+      "TransformConvOp::conv2d_column_packing_1": 0.0,
+      "TransformConvOp::conv2d_column_packing_io10": 0.0,
+      "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
+    }
+  },
+  "all": {
+    "compiletime": {
+      "algsimp": 0.0021269998978823423,
+      "call-inliner": 0.0003319999959785491,
+      "collective-stream-id-checker": 5.2999999752501026e-05,
+      "comparison-expander": 0.00047400000039488077,
+      "constant-statistics": 0.00037799999699927866,
+      "constant_folding": 0.00022899999748915434,
+      "dce": 5.999999848427251e-05,
+      "dot_decomposer": 0.00088900001719594,
+      "eliminate-redundant-compare": 0.00022499999613501132,
+      "flatten-call-graph": 0.0008660000166855752,
+      "hlo-mac-count": 0.0005499999970197678,
+      "instruction-histogram": 0.0005789999850094318,
+      "io-con-pipe-begin": 6.000000212225132e-06,
+      "io-con-pipe-end": 9.999999974752427e-07,
+      "io-layout-normalization": 0.0008500000112690032,
+      "io-statistics": 4.3000000005122274e-05,
+      "map-inline": 0.0007329999934881926,
+      "native-to-custom-softmax": 0.0005789999850094318,
+      "native-to-custom-softmax-dx": 0.00042799999937415123,
+      "neuron-hlo-verifier": 0.010563000105321407,
+      "opt-barrier-removal": 0.0004349999944679439,
+      "pre-par-pipe-begin": 1.9999999949504854e-06,
+      "pre-par-pipe-end": 0.0,
+      "pre-partition-simplification": 0.042899999767541885,
+      "replace-minimum-constant": 0.0004189999890513718,
+      "reshape-mover": 8.499999967170879e-05,
+      "simplify-while-loops": 6.800000119255856e-05,
+      "tuple-simplifier": 0.00025499999173916876,
+      "unpack-nested-aws-ntwsr": 0.0003000000142492354,
+      "unroll-while-loop": 1.2999999853491317e-05,
+      "zero_sized_hlo_elimination": 0.0007820000173524022
+    }
+  },
+  "sg00": {
+    "compiletime": {
+      "CanonicalizeConv": 3.9999998989515007e-05,
+      "CanonicalizeForTensorizer": 1.4999999621068127e-05,
+      "Canonicalizer": 0.00037799999699927866,
+      "HoistCompute": 1.1000000085914508e-05,
+      "IdentifyCrossPassTensors": 4.999999873689376e-05,
+      "MemcastMotion": 2.700000004551839e-05,
+      "PenguinizeFunctions": 1.8999999156221747e-05,
+      "PruneFunctions": 1.4000000192027073e-05,
+      "RemoveOptimizationBarriers": 5.2999999752501026e-05,
+      "ScatterMotion": 7.999999979801942e-06,
+      "TensorizerLegalizationPass": 2.5999999706982635e-05,
+      "VerifySupportedOps": 1.4000000192027073e-05,
+      "algsimp": 6.600000051548705e-05,
+      "batchnorm_expander": 1.2000000424450263e-05,
+      "boundary-marker-removal": 3.999999989900971e-06,
+      "call-inliner": 9.000000318337698e-06,
+      "canonicalize-boundary-marker": 4.999999873689376e-06,
+      "collective-stream-id-checker": 1.9999999949504854e-06,
+      "comparison-expander": 3.999999989900971e-06,
+      "computation-deduplicator": 9.999999747378752e-06,
+      "config-lowering": 2.8000000384054147e-05,
+      "constant_folding": 9.000000318337698e-06,
+      "cse": 1.2000000424450263e-05,
+      "dce": 9.999999974752427e-07,
+      "dynamic-slice-transpose": 3.999999989900971e-06,
+      "eliminate-redundant-compare": 3.999999989900971e-06,
+      "emit-offloaded-dropout": 1.2999999853491317e-05,
+      "flatten-call-graph": 7.999999979801942e-06,
+      "fuse-send-recv": 1.8999999156221747e-05,
+      "hilo-conditional-to-select": 3.999999989900971e-06,
+      "hilo::LegalizeAlias": 4.999999873689376e-06,
+      "hilo::NeuronInstCombine": 6.299999949987978e-05,
+      "hilo::NeuronOpFusion": 1.2999999853491317e-05,
+      "hilo::ReplaceTokenTypeWithU8Pass": 2.9000000722589903e-05,
+      "hilo::ScheduleFusion": 1.8000000636675395e-05,
+      "hilo::SixtyFourHack": 4.70000013592653e-05,
+      "hilo::VerifyAliasing": 7.000000096013537e-06,
+      "hlo-mac-count": 1.8999999156221747e-05,
+      "legalize-ccops-for-tensorizer": 1.9999999949504854e-06,
+      "legalize-compare": 3.000000106112566e-06,
+      "lower-argminmax-custom-call": 3.999999989900971e-06,
+      "map-inline": 9.999999747378752e-06,
+      "metadata-naming": 1.4999999621068127e-05,
+      "mlir::detail::OpToOpPassAdaptor": 5.2999999752501026e-05,
+      "mlir::hlo::MhloToPyPenguin": 0.0010730000212788582,
+      "mlir::mhlo::LowerComplexExtraPass": 9.000000136438757e-05,
+      "mlir::mhlo::LowerComplexPass": 0.0004689999914262444,
+      "native-to-custom-softmax": 4.999999873689376e-06,
+      "native-to-custom-softmax-dx": 6.199999916134402e-05,
+      "neuron-hlo-verifier": 0.00036100001307204366,
+      "operand_upcaster": 1.2000000424450263e-05,
+      "post-par-pipe-begin": 9.999999974752427e-07,
+      "post-par-pipe-end": 0.0,
+      "post-partition-simplification": 0.0005000000237487257,
+      "replace-minimum-constant": 7.000000096013537e-06,
+      "reshape-mover": 3.999999989900971e-06,
+      "simplify-concat": 4.8999998398358e-05,
+      "simplify-while-loops": 1.9999999949504854e-06,
+      "transform-variadic-reduce": 9.000000318337698e-06,
+      "tuple-simplifier": 4.999999873689376e-06,
+      "unpack-nested-aws-ntwsr": 3.000000106112566e-06,
+      "unroll-while-loop": 0.0
+    },
+    "hilo": {
+      "ArithmeticIntensity": 157.8584747314453,
+      "ConstantSize": 1094.0,
+      "HloInputCount": 402.0,
+      "HloMacCount": 60129542144.0,
+      "HloOutputCount": 73.0,
+      "IfmapSize": 8795039744.0,
+      "OfmapSize": 604587520.0,
+      "OutputsReadFromCount": 0.0,
+      "PassthroughTensorsCount": 0.0,
+      "RedundantOutputCount": 0.0,
+      "Traffic": 761815872.0
+    }
+  },
+  "sg0000": {
+    "compiletime": {
+      "AGOrderingAnalysisPass": 0.04314303398132324,
+      "AffinePredicateResolution": 0.0014219284057617188,
+      "AliasDependencyElimination": 0.00013756752014160156,
+      "AliasDependencyInduction": 0.009345054626464844,
+      "AliasDependencyReset": 0.023563146591186523,
+      "BFComputeCutting": 0.0037696361541748047,
+      "BirCodeGenLoop": 0.13709354400634766,
+      "CCOpFusion": 0.061038970947265625,
+      "CanonicalizeDAGForPGTiling": 0.0033833980560302734,
+      "CanonicalizeIR": 0.0029594898223876953,
+      "CoalesceCCOp": 0.0030164718627929688,
+      "CommuteConcat": 0.0010838508605957031,
+      "DMALocalityOpt": 0.002034425735473633,
+      "DMAProfiler": 0.005063295364379883,
+      "DMATilingProfiler": 0.0045011043548583984,
+      "DataLocalityOpt": 0.10246086120605469,
+      "DataStreaming": 0.008306264877319336,
+      "DeConcat": 0.001977682113647461,
+      "DeadCodeElimination": 0.0022068023681640625,
+      "DeadStoreElimination": 0.028951644897460938,
+      "DelinearIndices": 0.009624004364013672,
+      "Delinearization": 0.0037741661071777344,
+      "DoNothing": 7.152557373046875e-05,
+      "DramToDramTranspose": 0.002131938934326172,
+      "DumpGraphAndMetadata": 0.007627725601196289,
+      "EliminateDivs": 0.0052356719970703125,
+      "ExpandBatchNorm": 0.0017452239990234375,
+      "ExpandISAMacro": 0.003320455551147461,
+      "FactorizeBlkDims": 0.03702902793884277,
+      "FactorizeThreadAxesInFreeDims": 0.002062082290649414,
+      "FlattenMacroLoop": 0.003297090530395508,
+      "GenericAccessSimplifier": 0.0008604526519775391,
+      "InferInitValue": 0.038228511810302734,
+      "InferIntrinsicOnCC": 0.010860443115234375,
+      "InferNeuronTensor": 0.05763053894042969,
+      "InferNonlocalTensors": 0.2222437858581543,
+      "InferPSumTensor": 0.0829617977142334,
+      "InlineNativeKernels": 0.0021293163299560547,
+      "InsertIOTransposes": 0.016276836395263672,
+      "InsertLocalTransposes": 0.009328603744506836,
+      "InsertOffloadedTransposes": 0.009638786315917969,
+      "LICM": 0.003412008285522461,
+      "LateLegalizeInst": 0.009476900100708008,
+      "LateLegalizePostSplit": 0.00451970100402832,
+      "LateLowerReshapeOp": 0.001276254653930664,
+      "LateLowerTensorOp": 0.0059833526611328125,
+      "LateNeuronInstComb": 0.017744779586791992,
+      "LayoutPreprocessing": 0.03296685218811035,
+      "LayoutPreprocessingAndAnalysis": 0.1081993579864502,
+      "LayoutRequirementAnalysis": 0.00911259651184082,
+      "LegalizeCCOpLayout": 0.002354860305786133,
+      "LegalizeOpLevelAlias": 0.001096487045288086,
+      "LegalizePartitionReduce": 0.0018439292907714844,
+      "LegalizeSundaAccess": 0.04656553268432617,
+      "LegalizeSundaMacro": 0.011276006698608398,
+      "LegalizeType": 0.004993438720703125,
+      "LocalLayoutOpt": 0.03078746795654297,
+      "LoopFusion": 0.00582575798034668,
+      "LoopSplitting": 0.0003783702850341797,
+      "LowerBroadcast": 0.0020279884338378906,
+      "LowerCCOpBlockAxis": 0.006699562072753906,
+      "LowerComplexBroadcast": 0.002343893051147461,
+      "LowerIntrinsics": 0.050733089447021484,
+      "LowerTensorOp": 0.013502359390258789,
+      "LowerTranspose": 0.018018484115600586,
+      "MacroGeneration": 0.10082674026489258,
+      "MaskPropagation": 0.005489349365234375,
+      "MemcpyElimination": 0.12442874908447266,
+      "MutateDataType": 0.0011963844299316406,
+      "NeuronAliasDependencyInduction": 0.00037288665771484375,
+      "NeuronAliasDependencyReset": 0.012840986251831055,
+      "NeuronInstComb": 0.01097249984741211,
+      "NeuronLICM": 0.01376652717590332,
+      "NeuronLoopFusion": 0.017533540725708008,
+      "NeuronLoopInterchange": 0.002491474151611328,
+      "NeuronSimplifier": 0.013012886047363281,
+      "NeuronSimplifyPredicates": 0.027909040451049805,
+      "NeuronValueNumbering": 0.0057392120361328125,
+      "OptimizeAliasedCopyChain": 0.0006589889526367188,
+      "OptimizeNKIKernels": 0.0025641918182373047,
+      "PAGLayoutOpt": 0.24210047721862793,
+      "PComputeCutting": 0.008717536926269531,
+      "PGLayoutTilingPipeline": 0.9638533592224121,
+      "PGTiling": 0.20569705963134766,
+      "PadElimination": 0.0004475116729736328,
+      "ParAxesAnnotation": 0.22005105018615723,
+      "PartialLoopFusion": 0.047158002853393555,
+      "PartialSimdFusion": 0.07779932022094727,
+      "PerfectLoopNest": 0.0020694732666015625,
+      "RecognizeOpIdiom": 0.0045545101165771484,
+      "Recompute": 0.00025963783264160156,
+      "RelaxPredicates": 0.004879951477050781,
+      "Rematerialization": 0.005000591278076172,
+      "ReshapeWeights": 0.0006878376007080078,
+      "ResolveAccessConflict": 0.0038623809814453125,
+      "ResolveComplicatePredicates": 0.0014736652374267578,
+      "RewriteReplicationMatmul": 0.0016112327575683594,
+      "RewriteWeights": 0.002537965774536133,
+      "SFKVectorizer": 0.42055439949035645,
+      "SimpleAllReduceTiling": 0.002585887908935547,
+      "Simplifier": 0.0035560131072998047,
+      "SimplifyMacroPredicates": 0.02046680450439453,
+      "SimplifyNeuronTensor": 0.023018360137939453,
+      "SimplifySlice": 0.0010895729064941406,
+      "SimplifyTensor": 0.008915424346923828,
+      "SpillPSum": 0.032480716705322266,
+      "SplitAPUnionSets": 0.11712884902954102,
+      "SplitAccGrp": 0.0018432140350341797,
+      "StaticProfiler": 0.005769252777099609,
+      "StaticTransposeLocalTensor": 0.005156517028808594,
+      "SundaISel": 0.04348421096801758,
+      "TCTransform": 0.0010843276977539063,
+      "TensorInitialization": 0.03327298164367676,
+      "TensorOpSimplifier": 0.008444547653198242,
+      "TensorOpTransform": 0.025008678436279297,
+      "TileCCOps": 0.008762598037719727,
+      "TilingProfiler": 0.015444755554199219,
+      "TransformConvOp": 0.003448963165283203,
+      "TritiumFusion": 0.09676671028137207,
+      "ValueNumbering": 0.0035293102264404297,
+      "VectorizeDMA": 0.008908271789550781,
+      "VectorizeMatMult": 0.025291919708251953,
+      "WeightCoalescing": 0.00263214111328125,
+      "ZeroSizeTensorElimination": 0.00011610984802246094
+    },
+    "tensorizer": {
+      "DMATilingProfiler::TotalInstructionsAfterTiling": 15106.0,
+      "StaticProfiler::AifUb": 201.88758850097656,
+      "StaticProfiler::ArithmeticIntensityTensorizer": 641.6123657226563,
+      "StaticProfiler::AverageDmaLength": 3126.786865234375,
+      "StaticProfiler::AverageFractalPeUtilization": 99.9704360961914,
+      "StaticProfiler::AveragePartitionUtilization": 99.9277572631836,
+      "StaticProfiler::AveragePeUtilization": 99.87899780273438,
+      "StaticProfiler::DDRTransferBytes": 231802112.0,
+      "StaticProfiler::InternalTransferBytes": 335020032.0,
+      "StaticProfiler::LoadExpanded": 43652.0,
+      "StaticProfiler::LocalizationEfficiency": 317.8067321777344,
+      "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 425.62677001953125,
+      "StaticProfiler::StoreExpanded": 24065.0,
+      "StaticProfiler::TotalDMAExpanded": 67717.0,
+      "StaticProfiler::TotalDynamicInstancesCount": 24368.0,
+      "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 24356.0,
+      "StaticProfiler::TotalLNCComm": 0.0,
+      "StaticProfiler::TotalLNCCommTransfer": 0.0,
+      "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
+      "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
+      "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
+      "TilingProfiler::DmaInstructionsAfterTiling": 0.0,
+      "TilingProfiler::GenericInstructionsAfterTiling": 192.0,
+      "TilingProfiler::MatMultInstructionsAfterTiling": 7184.0,
+      "TilingProfiler::NumPfTransposes": 8.0,
+      "TilingProfiler::NumPfTransposesForIo": 0.0,
+      "TilingProfiler::NumPfTransposesForLocal": 6.0,
+      "TilingProfiler::NumPfTransposesForNonlocal": 2.0,
+      "TilingProfiler::PfTransposeInstructions": 5568.0,
+      "TilingProfiler::PfTransposeInstructionsForIo": 0.0,
+      "TilingProfiler::PfTransposeInstructionsForLocal": 4800.0,
+      "TilingProfiler::PfTransposeInstructionsForNonlocal": 768.0,
+      "TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
+      "TilingProfiler::SimdInstructionsAfterTiling": 1635.0,
+      "TilingProfiler::TotalInstructionsAfterTiling": 0.0,
+      "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
+      "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
+      "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
+      "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
+      "TransformConvOp::conv2d_column_packing": 0.0,
+      "TransformConvOp::conv2d_column_packing_1": 0.0,
+      "TransformConvOp::conv2d_column_packing_io10": 0.0,
+      "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
+    }
+  },
+  "sg0001": {
+    "compiletime": {
+      "AGOrderingAnalysisPass": 0.04393625259399414,
+      "AffinePredicateResolution": 0.001329183578491211,
+      "AliasDependencyElimination": 0.00010919570922851563,
+      "AliasDependencyInduction": 0.009439468383789063,
+      "AliasDependencyReset": 0.02581024169921875,
+      "BFComputeCutting": 0.0038251876831054688,
+      "BirCodeGenLoop": 0.10795426368713379,
+      "CCOpFusion": 0.05731463432312012,
+      "CanonicalizeDAGForPGTiling": 0.0033617019653320313,
+      "CanonicalizeIR": 0.0017633438110351563,
+      "CoalesceCCOp": 0.0020570755004882813,
+      "CommuteConcat": 0.0011479854583740234,
+      "DMALocalityOpt": 0.0014009475708007813,
+      "DMAProfiler": 0.0043773651123046875,
+      "DMATilingProfiler": 0.005214691162109375,
+      "DataLocalityOpt": 0.14134621620178223,
+      "DataStreaming": 0.006264209747314453,
+      "DeConcat": 0.0018208026885986328,
+      "DeadCodeElimination": 0.0017764568328857422,
+      "DeadStoreElimination": 0.03261542320251465,
+      "DelinearIndices": 0.011170148849487305,
+      "Delinearization": 0.006006479263305664,
+      "DoNothing": 7.104873657226563e-05,
+      "DramToDramTranspose": 0.002429485321044922,
+      "DumpGraphAndMetadata": 0.006948947906494141,
+      "EliminateDivs": 0.0039048194885253906,
+      "ExpandBatchNorm": 0.0020303726196289063,
+      "ExpandISAMacro": 0.0029213428497314453,
+      "FactorizeBlkDims": 0.028472423553466797,
+      "FactorizeThreadAxesInFreeDims": 0.002131938934326172,
+      "FlattenMacroLoop": 0.0029516220092773438,
+      "GenericAccessSimplifier": 0.0009126663208007813,
+      "InferInitValue": 0.045678138732910156,
+      "InferIntrinsicOnCC": 0.01059865951538086,
+      "InferNeuronTensor": 0.06629371643066406,
+      "InferNonlocalTensors": 0.03510117530822754,
+      "InferPSumTensor": 0.06450939178466797,
+      "InlineNativeKernels": 0.0018451213836669922,
+      "InsertIOTransposes": 0.026484012603759766,
+      "InsertLocalTransposes": 0.008344173431396484,
+      "InsertOffloadedTransposes": 0.0071446895599365234,
+      "LICM": 0.0036759376525878906,
+      "LateLegalizeInst": 0.004426479339599609,
+      "LateLegalizePostSplit": 0.0032634735107421875,
+      "LateLowerReshapeOp": 0.0013735294342041016,
+      "LateLowerTensorOp": 0.005624532699584961,
+      "LateNeuronInstComb": 0.017444849014282227,
+      "LayoutPreprocessing": 0.03893160820007324,
+      "LayoutPreprocessingAndAnalysis": 0.07227110862731934,
+      "LayoutRequirementAnalysis": 0.009693145751953125,
+      "LegalizeCCOpLayout": 0.0020418167114257813,
+      "LegalizeOpLevelAlias": 0.001138448715209961,
+      "LegalizePartitionReduce": 0.0015141963958740234,
+      "LegalizeSundaAccess": 0.019158363342285156,
+      "LegalizeSundaMacro": 0.01407480239868164,
+      "LegalizeType": 0.005761623382568359,
+      "LocalLayoutOpt": 0.039359331130981445,
+      "LoopFusion": 0.006695985794067383,
+      "LoopSplitting": 0.0003750324249267578,
+      "LowerBroadcast": 0.0017099380493164063,
+      "LowerCCOpBlockAxis": 0.006280422210693359,
+      "LowerComplexBroadcast": 0.0021283626556396484,
+      "LowerIntrinsics": 0.051717519760131836,
+      "LowerTensorOp": 0.012630462646484375,
+      "LowerTranspose": 0.018566608428955078,
+      "MacroGeneration": 0.10026049613952637,
+      "MaskPropagation": 0.003166675567626953,
+      "MemcpyElimination": 0.11932373046875,
+      "MutateDataType": 0.0012645721435546875,
+      "NeuronAliasDependencyInduction": 0.0003998279571533203,
+      "NeuronAliasDependencyReset": 0.012525796890258789,
+      "NeuronInstComb": 0.009615182876586914,
+      "NeuronLICM": 0.010987997055053711,
+      "NeuronLoopFusion": 0.02326488494873047,
+      "NeuronLoopInterchange": 0.002103090286254883,
+      "NeuronSimplifier": 0.014336109161376953,
+      "NeuronSimplifyPredicates": 0.008521318435668945,
+      "NeuronValueNumbering": 0.0033826828002929688,
+      "OptimizeAliasedCopyChain": 0.0006175041198730469,
+      "OptimizeNKIKernels": 0.0019922256469726563,
+      "PAGLayoutOpt": 0.4941246509552002,
+      "PComputeCutting": 0.007924079895019531,
+      "PGLayoutTilingPipeline": 0.9564568996429443,
+      "PGTiling": 0.1994781494140625,
+      "PadElimination": 0.00040078163146972656,
+      "ParAxesAnnotation": 0.4647810459136963,
+      "PartialLoopFusion": 0.022934675216674805,
+      "PartialSimdFusion": 0.0667269229888916,
+      "PerfectLoopNest": 0.002288341522216797,
+      "RecognizeOpIdiom": 0.004983663558959961,
+      "Recompute": 0.00026607513427734375,
+      "RelaxPredicates": 0.003928184509277344,
+      "Rematerialization": 0.0023522377014160156,
+      "ReshapeWeights": 0.0006775856018066406,
+      "ResolveAccessConflict": 0.006055116653442383,
+      "ResolveComplicatePredicates": 0.0013859272003173828,
+      "RewriteReplicationMatmul": 0.0016639232635498047,
+      "RewriteWeights": 0.0022728443145751953,
+      "SFKVectorizer": 0.2814767360687256,
+      "SimpleAllReduceTiling": 0.0020284652709960938,
+      "Simplifier": 0.0032384395599365234,
+      "SimplifyMacroPredicates": 0.0071146488189697266,
+      "SimplifyNeuronTensor": 0.014531373977661133,
+      "SimplifySlice": 0.0009911060333251953,
+      "SimplifyTensor": 0.009533405303955078,
+      "SpillPSum": 0.02969837188720703,
+      "SplitAPUnionSets": 0.03618764877319336,
+      "SplitAccGrp": 0.0015993118286132813,
+      "StaticProfiler": 0.004440784454345703,
+      "StaticTransposeLocalTensor": 0.0054094791412353516,
+      "SundaISel": 0.04047369956970215,
+      "TCTransform": 0.0010793209075927734,
+      "TensorInitialization": 0.007404804229736328,
+      "TensorOpSimplifier": 0.008566856384277344,
+      "TensorOpTransform": 0.03130841255187988,
+      "TileCCOps": 0.008295297622680664,
+      "TilingProfiler": 0.01665639877319336,
+      "TransformConvOp": 0.0034193992614746094,
+      "TritiumFusion": 0.13472461700439453,
+      "ValueNumbering": 0.002829313278198242,
+      "VectorizeDMA": 0.002307891845703125,
+      "VectorizeMatMult": 0.02826690673828125,
+      "WeightCoalescing": 0.0019905567169189453,
+      "ZeroSizeTensorElimination": 0.00034880638122558594
+    },
+    "tensorizer": {
+      "DMATilingProfiler::TotalInstructionsAfterTiling": 34504.0,
+      "StaticProfiler::AifUb": 1543.150390625,
+      "StaticProfiler::ArithmeticIntensityTensorizer": 599.2274169921875,
+      "StaticProfiler::AverageDmaLength": 1330.70458984375,
+      "StaticProfiler::AverageFractalPeUtilization": 100.0,
+      "StaticProfiler::AveragePartitionUtilization": 99.83068084716797,
+      "StaticProfiler::AveragePeUtilization": 100.0,
+      "StaticProfiler::DDRTransferBytes": 769721344.0,
+      "StaticProfiler::InternalTransferBytes": 414711808.0,
+      "StaticProfiler::LoadExpanded": 545409.0,
+      "StaticProfiler::LocalizationEfficiency": 38.8314323425293,
+      "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 42.54035186767578,
+      "StaticProfiler::StoreExpanded": 30721.0,
+      "StaticProfiler::TotalDMAExpanded": 576130.0,
+      "StaticProfiler::TotalDynamicInstancesCount": 47251.0,
+      "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 47251.0,
+      "StaticProfiler::TotalLNCComm": 0.0,
+      "StaticProfiler::TotalLNCCommTransfer": 0.0,
+      "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
+      "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
+      "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
+      "TilingProfiler::DmaInstructionsAfterTiling": 0.0,
+      "TilingProfiler::GenericInstructionsAfterTiling": 128.0,
+      "TilingProfiler::MatMultInstructionsAfterTiling": 25600.0,
+      "TilingProfiler::NumPfTransposes": 9.0,
+      "TilingProfiler::NumPfTransposesForIo": 3.0,
+      "TilingProfiler::NumPfTransposesForLocal": 4.0,
+      "TilingProfiler::NumPfTransposesForNonlocal": 2.0,
+      "TilingProfiler::PfTransposeInstructions": 5856.0,
+      "TilingProfiler::PfTransposeInstructionsForIo": 544.0,
+      "TilingProfiler::PfTransposeInstructionsForLocal": 4288.0,
+      "TilingProfiler::PfTransposeInstructionsForNonlocal": 1024.0,
+      "TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
+      "TilingProfiler::SimdInstructionsAfterTiling": 1923.0,
+      "TilingProfiler::TotalInstructionsAfterTiling": 0.0,
+      "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
+      "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
+      "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
+      "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
+      "TransformConvOp::conv2d_column_packing": 0.0,
+      "TransformConvOp::conv2d_column_packing_1": 0.0,
+      "TransformConvOp::conv2d_column_packing_io10": 0.0,
+      "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
+    }
+  },
+  "sg0002": {
+    "compiletime": {
+      "AGOrderingAnalysisPass": 0.07080268859863281,
+      "AffinePredicateResolution": 0.001844644546508789,
+      "AliasDependencyElimination": 0.0001308917999267578,
+      "AliasDependencyInduction": 0.012178182601928711,
+      "AliasDependencyReset": 0.027022123336791992,
+      "BFComputeCutting": 0.0061855316162109375,
+      "BirCodeGenLoop": 0.17315936088562012,
+      "CCOpFusion": 0.08119010925292969,
+      "CanonicalizeDAGForPGTiling": 0.0066263675689697266,
+      "CanonicalizeIR": 0.0033893585205078125,
+      "CoalesceCCOp": 0.0032279491424560547,
+      "CommuteConcat": 0.0016355514526367188,
+      "DMALocalityOpt": 0.0021250247955322266,
+      "DMAProfiler": 0.005837917327880859,
+      "DMATilingProfiler": 0.010099172592163086,
+      "DataLocalityOpt": 0.25125575065612793,
+      "DataStreaming": 0.010326147079467773,
+      "DeConcat": 0.002583742141723633,
+      "DeadCodeElimination": 0.0018777847290039063,
+      "DeadStoreElimination": 0.05162811279296875,
+      "DelinearIndices": 0.017117977142333984,
+      "Delinearization": 0.006100893020629883,
+      "DoNothing": 6.961822509765625e-05,
+      "DramToDramTranspose": 0.0047817230224609375,
+      "DumpGraphAndMetadata": 0.008226871490478516,
+      "EliminateDivs": 0.005579710006713867,
+      "ExpandBatchNorm": 0.0024263858795166016,
+      "ExpandISAMacro": 0.004798412322998047,
+      "FactorizeBlkDims": 0.059967756271362305,
+      "FactorizeThreadAxesInFreeDims": 0.003553628921508789,
+      "FlattenMacroLoop": 0.0051920413970947266,
+      "GenericAccessSimplifier": 0.0013842582702636719,
+      "InferInitValue": 0.07938385009765625,
+      "InferIntrinsicOnCC": 0.0170440673828125,
+      "InferNeuronTensor": 0.09969878196716309,
+      "InferNonlocalTensors": 0.11206626892089844,
+      "InferPSumTensor": 0.0999910831451416,
+      "InlineNativeKernels": 0.003079652786254883,
+      "InsertIOTransposes": 0.031575918197631836,
+      "InsertLocalTransposes": 0.01450800895690918,
+      "InsertOffloadedTransposes": 0.010621309280395508,
+      "LICM": 0.006058454513549805,
+      "LateLegalizeInst": 0.009308576583862305,
+      "LateLegalizePostSplit": 0.005577564239501953,
+      "LateLowerReshapeOp": 0.002005338668823242,
+      "LateLowerTensorOp": 0.006224155426025391,
+      "LateNeuronInstComb": 0.026279211044311523,
+      "LayoutPreprocessing": 0.045662879943847656,
+      "LayoutPreprocessingAndAnalysis": 0.08894896507263184,
+      "LayoutRequirementAnalysis": 0.014644384384155273,
+      "LegalizeCCOpLayout": 0.002945423126220703,
+      "LegalizeOpLevelAlias": 0.0015463829040527344,
+      "LegalizePartitionReduce": 0.0025038719177246094,
+      "LegalizeSundaAccess": 0.05164527893066406,
+      "LegalizeSundaMacro": 0.02343463897705078,
+      "LegalizeType": 0.007515668869018555,
+      "LocalLayoutOpt": 0.05471658706665039,
+      "LoopFusion": 0.009645700454711914,
+      "LoopSplitting": 0.0005736351013183594,
+      "LowerBroadcast": 0.0030760765075683594,
+      "LowerCCOpBlockAxis": 0.010100364685058594,
+      "LowerComplexBroadcast": 0.004259347915649414,
+      "LowerIntrinsics": 0.071380615234375,
+      "LowerTensorOp": 0.017409563064575195,
+      "LowerTranspose": 2.716614007949829,
+      "MacroGeneration": 0.15842843055725098,
+      "MaskPropagation": 0.004798412322998047,
+      "MemcpyElimination": 0.14069795608520508,
+      "MutateDataType": 0.0018687248229980469,
+      "NeuronAliasDependencyInduction": 0.0005295276641845703,
+      "NeuronAliasDependencyReset": 0.014295816421508789,
+      "NeuronInstComb": 0.014310121536254883,
+      "NeuronLICM": 0.01824188232421875,
+      "NeuronLoopFusion": 0.03763270378112793,
+      "NeuronLoopInterchange": 0.0033299922943115234,
+      "NeuronSimplifier": 0.02371072769165039,
+      "NeuronSimplifyPredicates": 0.010996103286743164,
+      "NeuronValueNumbering": 0.005862236022949219,
+      "OptimizeAliasedCopyChain": 0.0014340877532958984,
+      "OptimizeNKIKernels": 0.0033910274505615234,
+      "PAGLayoutOpt": 1.3955655097961426,
+      "PComputeCutting": 0.01343226432800293,
+      "PGLayoutTilingPipeline": 2.056190013885498,
+      "PGTiling": 0.29619383811950684,
+      "PadElimination": 0.000553131103515625,
+      "ParAxesAnnotation": 1.3579421043395996,
+      "PartialLoopFusion": 0.04784822463989258,
+      "PartialSimdFusion": 0.1002810001373291,
+      "PerfectLoopNest": 0.0037620067596435547,
+      "RecognizeOpIdiom": 0.0069119930267333984,
+      "Recompute": 0.0003383159637451172,
+      "RelaxPredicates": 0.04035329818725586,
+      "Rematerialization": 0.003230571746826172,
+      "ReshapeWeights": 0.0011525154113769531,
+      "ResolveAccessConflict": 0.0065386295318603516,
+      "ResolveComplicatePredicates": 0.002877473831176758,
+      "RewriteReplicationMatmul": 0.0025200843811035156,
+      "RewriteWeights": 0.0038268566131591797,
+      "SFKVectorizer": 0.48886895179748535,
+      "SimpleAllReduceTiling": 0.0031387805938720703,
+      "Simplifier": 0.004804134368896484,
+      "SimplifyMacroPredicates": 0.01790642738342285,
+      "SimplifyNeuronTensor": 0.020508527755737305,
+      "SimplifySlice": 0.00145721435546875,
+      "SimplifyTensor": 0.016368389129638672,
+      "SpillPSum": 0.049539804458618164,
+      "SplitAPUnionSets": 0.060128211975097656,
+      "SplitAccGrp": 0.0025734901428222656,
+      "StaticProfiler": 0.006608724594116211,
+      "StaticTransposeLocalTensor": 0.008615732192993164,
+      "SundaISel": 0.06819963455200195,
+      "TCTransform": 0.0016434192657470703,
+      "TensorInitialization": 0.013004541397094727,
+      "TensorOpSimplifier": 0.011576175689697266,
+      "TensorOpTransform": 0.04517507553100586,
+      "TileCCOps": 0.011648893356323242,
+      "TilingProfiler": 0.02406597137451172,
+      "TransformConvOp": 0.004629850387573242,
+      "TritiumFusion": 0.26013898849487305,
+      "ValueNumbering": 0.004456520080566406,
+      "VectorizeDMA": 0.009630918502807617,
+      "VectorizeMatMult": 0.046350955963134766,
+      "WeightCoalescing": 0.0030286312103271484,
+      "ZeroSizeTensorElimination": 0.00014090538024902344
+    },
+    "tensorizer": {
+      "DMATilingProfiler::TotalInstructionsAfterTiling": 95441.0,
+      "StaticProfiler::AifUb": 832.489990234375,
+      "StaticProfiler::ArithmeticIntensityTensorizer": 434.669921875,
+      "StaticProfiler::AverageDmaLength": 1623.94287109375,
+      "StaticProfiler::AverageFractalPeUtilization": 99.99919128417969,
+      "StaticProfiler::AveragePartitionUtilization": 99.9390869140625,
+      "StaticProfiler::AveragePeUtilization": 99.99919128417969,
+      "StaticProfiler::DDRTransferBytes": 1990484992.0,
+      "StaticProfiler::InternalTransferBytes": 1161470464.0,
+      "StaticProfiler::LoadExpanded": 1195909.0,
+      "StaticProfiler::LocalizationEfficiency": 52.21323013305664,
+      "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 56.50444793701172,
+      "StaticProfiler::StoreExpanded": 28288.0,
+      "StaticProfiler::TotalDMAExpanded": 1224197.0,
+      "StaticProfiler::TotalDynamicInstancesCount": 120578.0,
+      "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 120578.0,
+      "StaticProfiler::TotalLNCComm": 0.0,
+      "StaticProfiler::TotalLNCCommTransfer": 0.0,
+      "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
+      "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
+      "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
+      "TilingProfiler::DmaInstructionsAfterTiling": 0.0,
+      "TilingProfiler::GenericInstructionsAfterTiling": 129.0,
+      "TilingProfiler::MatMultInstructionsAfterTiling": 63040.0,
+      "TilingProfiler::NumPfTransposes": 13.0,
+      "TilingProfiler::NumPfTransposesForIo": 3.0,
+      "TilingProfiler::NumPfTransposesForLocal": 8.0,
+      "TilingProfiler::NumPfTransposesForNonlocal": 2.0,
+      "TilingProfiler::PfTransposeInstructions": 25889.0,
+      "TilingProfiler::PfTransposeInstructionsForIo": 19040.0,
+      "TilingProfiler::PfTransposeInstructionsForLocal": 5825.0,
+      "TilingProfiler::PfTransposeInstructionsForNonlocal": 1024.0,
+      "TilingProfiler::ReduceInstructionsAfterTiling": 8.0,
+      "TilingProfiler::SimdInstructionsAfterTiling": 3225.0,
+      "TilingProfiler::TotalInstructionsAfterTiling": 0.0,
+      "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
+      "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
+      "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
+      "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
+      "TransformConvOp::conv2d_column_packing": 0.0,
+      "TransformConvOp::conv2d_column_packing_1": 0.0,
+      "TransformConvOp::conv2d_column_packing_io10": 0.0,
+      "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
+    }
+  },
+  "sg01": {
+    "compiletime": {
+      "CanonicalizeConv": 3.000000106112566e-06,
+      "CanonicalizeForTensorizer": 1.2999999853491317e-05,
+      "Canonicalizer": 0.00024300000222865492,
+      "HoistCompute": 1.9999999949504854e-06,
+      "IdentifyCrossPassTensors": 1.1000000085914508e-05,
+      "MemcastMotion": 6.000000212225132e-06,
+      "PenguinizeFunctions": 1.2000000424450263e-05,
+      "PruneFunctions": 9.999999747378752e-06,
+      "RemoveOptimizationBarriers": 1.1000000085914508e-05,
+      "ScatterMotion": 0.0,
+      "TensorizerLegalizationPass": 1.2999999853491317e-05,
+      "VerifySupportedOps": 1.2000000424450263e-05,
+      "algsimp": 5.900000178371556e-05,
+      "batchnorm_expander": 1.2000000424450263e-05,
+      "boundary-marker-removal": 3.999999989900971e-06,
+      "call-inliner": 9.000000318337698e-06,
+      "canonicalize-boundary-marker": 4.999999873689376e-06,
+      "collective-stream-id-checker": 3.000000106112566e-06,
+      "comparison-expander": 3.999999989900971e-06,
+      "computation-deduplicator": 1.5999999959603883e-05,
+      "config-lowering": 2.2000000171829015e-05,
+      "constant_folding": 7.999999979801942e-06,
+      "cse": 2.9999999242136255e-05,
+      "dce": 9.999999974752427e-07,
+      "dynamic-slice-transpose": 3.999999989900971e-06,
+      "eliminate-redundant-compare": 3.999999989900971e-06,
+      "emit-offloaded-dropout": 1.2999999853491317e-05,
+      "flatten-call-graph": 7.999999979801942e-06,
+      "fuse-send-recv": 1.8999999156221747e-05,
+      "hilo-conditional-to-select": 4.999999873689376e-06,
+      "hilo::LegalizeAlias": 3.999999989900971e-06,
+      "hilo::NeuronInstCombine": 4.600000102072954e-05,
+      "hilo::NeuronOpFusion": 1.8000000636675395e-05,
+      "hilo::ReplaceTokenTypeWithU8Pass": 1.1000000085914508e-05,
+      "hilo::ScheduleFusion": 7.000000096013537e-06,
+      "hilo::SixtyFourHack": 1.2000000424450263e-05,
+      "hilo::VerifyAliasing": 1.9999999949504854e-06,
+      "hlo-mac-count": 1.9999999494757503e-05,
+      "legalize-ccops-for-tensorizer": 9.999999974752427e-07,
+      "legalize-compare": 3.999999989900971e-06,
+      "lower-argminmax-custom-call": 3.999999989900971e-06,
+      "map-inline": 1.1000000085914508e-05,
+      "metadata-naming": 1.700000029813964e-05,
+      "mlir::detail::OpToOpPassAdaptor": 0.0006070000235922635,
+      "mlir::hlo::MhloToPyPenguin": 0.0010010000551119447,
+      "mlir::mhlo::LowerComplexExtraPass": 8.099999831756577e-05,
+      "mlir::mhlo::LowerComplexPass": 0.00011500000255182385,
+      "native-to-custom-softmax": 4.999999873689376e-06,
+      "native-to-custom-softmax-dx": 9.999999747378752e-06,
+      "neuron-hlo-verifier": 0.0003809999907389283,
+      "operand_upcaster": 1.2000000424450263e-05,
+      "post-par-pipe-begin": 9.999999974752427e-07,
+      "post-par-pipe-end": 0.0,
+      "post-partition-simplification": 0.0004670000053010881,
+      "replace-minimum-constant": 7.000000096013537e-06,
+      "reshape-mover": 3.000000106112566e-06,
+      "simplify-concat": 5.0999999075429514e-05,
+      "simplify-while-loops": 1.9999999949504854e-06,
+      "transform-variadic-reduce": 9.000000318337698e-06,
+      "tuple-simplifier": 4.999999873689376e-06,
+      "unpack-nested-aws-ntwsr": 3.999999989900971e-06,
+      "unroll-while-loop": 0.0
+    },
+    "hilo": {
+      "ArithmeticIntensity": 1360.724365234375,
+      "HloMacCount": 214748364800.0,
+      "Traffic": 315638304.0
+    }
+  },
+  "sg02": {
+    "compiletime": {
+      "CanonicalizeConv": 7.699999696342275e-05,
+      "CanonicalizeForTensorizer": 1.5999999959603883e-05,
+      "Canonicalizer": 0.00037900000461377203,
+      "HoistCompute": 0.0,
+      "IdentifyCrossPassTensors": 1.2000000424450263e-05,
+      "MemcastMotion": 0.0,
+      "PenguinizeFunctions": 1.1000000085914508e-05,
+      "PruneFunctions": 7.999999979801942e-06,
+      "RemoveOptimizationBarriers": 1.2000000424450263e-05,
+      "ScatterMotion": 1.1000000085914508e-05,
+      "TensorizerLegalizationPass": 9.000000318337698e-06,
+      "VerifySupportedOps": 1.700000029813964e-05,
+      "algsimp": 8.70000003487803e-05,
+      "batchnorm_expander": 1.700000029813964e-05,
+      "boundary-marker-removal": 6.000000212225132e-06,
+      "call-inliner": 1.2999999853491317e-05,
+      "canonicalize-boundary-marker": 7.999999979801942e-06,
+      "collective-stream-id-checker": 3.000000106112566e-06,
+      "comparison-expander": 7.999999979801942e-06,
+      "computation-deduplicator": 3.199999991920777e-05,
+      "config-lowering": 3.400000059627928e-05,
+      "constant_folding": 1.1000000085914508e-05,
+      "cse": 1.700000029813964e-05,
+      "dce": 9.999999974752427e-07,
+      "dynamic-slice-transpose": 4.999999873689376e-06,
+      "eliminate-redundant-compare": 6.000000212225132e-06,
+      "emit-offloaded-dropout": 1.5999999959603883e-05,
+      "flatten-call-graph": 1.2000000424450263e-05,
+      "fuse-send-recv": 2.700000004551839e-05,
+      "hilo-conditional-to-select": 7.000000096013537e-06,
+      "hilo::LegalizeAlias": 4.999999873689376e-06,
+      "hilo::NeuronInstCombine": 3.9999998989515007e-05,
+      "hilo::NeuronOpFusion": 0.0,
+      "hilo::ReplaceTokenTypeWithU8Pass": 9.000000318337698e-06,
+      "hilo::ScheduleFusion": 1.9999999949504854e-06,
+      "hilo::SixtyFourHack": 1.2000000424450263e-05,
+      "hilo::VerifyAliasing": 1.9999999949504854e-06,
+      "hlo-mac-count": 3.199999991920777e-05,
+      "legalize-ccops-for-tensorizer": 9.999999974752427e-07,
+      "legalize-compare": 4.999999873689376e-06,
+      "lower-argminmax-custom-call": 4.999999873689376e-06,
+      "map-inline": 1.5999999959603883e-05,
+      "metadata-naming": 2.300000051036477e-05,
+      "mlir::detail::OpToOpPassAdaptor": 1.8000000636675395e-05,
+      "mlir::hlo::MhloToPyPenguin": 0.0014220000011846423,
+      "mlir::mhlo::LowerComplexExtraPass": 0.00012799999967683107,
+      "mlir::mhlo::LowerComplexPass": 0.00019500000053085387,
+      "native-to-custom-softmax": 9.000000318337698e-06,
+      "native-to-custom-softmax-dx": 1.8999999156221747e-05,
+      "neuron-hlo-verifier": 0.000534999999217689,
+      "operand_upcaster": 1.700000029813964e-05,
+      "post-par-pipe-begin": 1.9999999949504854e-06,
+      "post-par-pipe-end": 0.0,
+      "post-partition-simplification": 0.0006779999821446836,
+      "replace-minimum-constant": 9.999999747378752e-06,
+      "reshape-mover": 3.999999989900971e-06,
+      "simplify-concat": 9.40000027185306e-05,
+      "simplify-while-loops": 3.000000106112566e-06,
+      "transform-variadic-reduce": 5.2999999752501026e-05,
+      "tuple-simplifier": 7.000000096013537e-06,
+      "unpack-nested-aws-ntwsr": 6.000000212225132e-06,
+      "unroll-while-loop": 0.0
+    },
+    "hilo": {
+      "ArithmeticIntensity": 688.8796997070313,
+      "HloMacCount": 369678352384.0,
+      "Traffic": 1073274048.0
+    }
+  }
+}

context_encoding_model/_tp0_bk0/graph.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2f5ea2ec513d4c246461496be1df6a5408a8b32e0e3d5e8c000252b38c4eb0
+size 4557824

context_encoding_model/_tp0_bk0/log-neuron-cc.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

context_encoding_model/_tp0_bk0/metaneff.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:979a3ce4ea645eb08474f1ced61267e507be927f3b1d9d6e3b8e280bf5249638
+size 804702

context_encoding_model/_tp0_bk0/model.MODULE_e80578c547275f02c0fa+ed72d204.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8a1a7494d891cfc6c1e0596bf5b2088f381187ae6f1ab618b3f2610e62f323e
+size 889460

context_encoding_model/_tp0_bk0/model.MODULE_e80578c547275f02c0fa+ed72d204.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2f5ea2ec513d4c246461496be1df6a5408a8b32e0e3d5e8c000252b38c4eb0
+size 4557824

layout_opt/command.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ neuronx-cc compile graph.hlo --framework XLA --target trn1 --output graph.neff --model-type=transformer -O1 --lnc=1 '--internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=true' --logfile=log-neuron-cc.txt --verbose=35

layout_opt/graph.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ee6e68f9bde84f2d463d428631187d977b7c48d6aab9ea708b3ee5b5fcd5d4c
+size 7343104

layout_opt/log-neuron-cc.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

layout_opt/metaneff ADDED Viewed

	@@ -0,0 +1,982 @@

+(
+input0��	�2embed_tokens.weight8
+;
+input1� �2'layers.0.self_attn.o_proj.o_proj.weight8
+;
+input2�� 2'layers.0.self_attn.qkv_proj.Wqkv.weight8
+1
+input3� 2layers.0.input_layernorm.weight8
+7
+input4�2%layers.0.self_attn.k_layernorm.weight8
+7
+input5�2%layers.0.self_attn.q_layernorm.weight8
+1
+input6� �02layers.0.mlp.down_proj.weight8
+/
+input7�0� 2layers.0.mlp.up_proj.weight8
+:
+input8� 2(layers.0.post_attention_layernorm.weight8
+1
+input9�0� 2layers.0.mlp.gate_proj.weight8
+<
+input10� �2'layers.1.self_attn.o_proj.o_proj.weight8
+<
+input11�� 2'layers.1.self_attn.qkv_proj.Wqkv.weight8
+2
+input12� 2layers.1.input_layernorm.weight8
+8
+input13�2%layers.1.self_attn.k_layernorm.weight8
+8
+input14�2%layers.1.self_attn.q_layernorm.weight8
+2
+input15� �02layers.1.mlp.down_proj.weight8
+0
+input16�0� 2layers.1.mlp.up_proj.weight8
+;
+input17� 2(layers.1.post_attention_layernorm.weight8
+2
+input18�0� 2layers.1.mlp.gate_proj.weight8
+<
+input19� �2'layers.2.self_attn.o_proj.o_proj.weight8
+<
+input20�� 2'layers.2.self_attn.qkv_proj.Wqkv.weight8
+2
+input21� 2layers.2.input_layernorm.weight8
+8
+input22�2%layers.2.self_attn.k_layernorm.weight8
+8
+input23�2%layers.2.self_attn.q_layernorm.weight8
+2
+input24� �02layers.2.mlp.down_proj.weight8
+0
+input25�0� 2layers.2.mlp.up_proj.weight8
+;
+input26� 2(layers.2.post_attention_layernorm.weight8
+2
+input27�0� 2layers.2.mlp.gate_proj.weight8
+<
+input28� �2'layers.3.self_attn.o_proj.o_proj.weight8
+<
+input29�� 2'layers.3.self_attn.qkv_proj.Wqkv.weight8
+2
+input30� 2layers.3.input_layernorm.weight8
+8
+input31�2%layers.3.self_attn.k_layernorm.weight8
+8
+input32�2%layers.3.self_attn.q_layernorm.weight8
+2
+input33� �02layers.3.mlp.down_proj.weight8
+0
+input34�0� 2layers.3.mlp.up_proj.weight8
+;
+input35� 2(layers.3.post_attention_layernorm.weight8
+2
+input36�0� 2layers.3.mlp.gate_proj.weight8
+<
+input37� �2'layers.4.self_attn.o_proj.o_proj.weight8
+<
+input38�� 2'layers.4.self_attn.qkv_proj.Wqkv.weight8
+2
+input39� 2layers.4.input_layernorm.weight8
+8
+input40�2%layers.4.self_attn.k_layernorm.weight8
+8
+input41�2%layers.4.self_attn.q_layernorm.weight8
+2
+input42� �02layers.4.mlp.down_proj.weight8
+0
+input43�0� 2layers.4.mlp.up_proj.weight8
+;
+input44� 2(layers.4.post_attention_layernorm.weight8
+2
+input45�0� 2layers.4.mlp.gate_proj.weight8
+<
+input46� �2'layers.5.self_attn.o_proj.o_proj.weight8
+<
+input47�� 2'layers.5.self_attn.qkv_proj.Wqkv.weight8
+2
+input48� 2layers.5.input_layernorm.weight8
+8
+input49�2%layers.5.self_attn.k_layernorm.weight8
+8
+input50�2%layers.5.self_attn.q_layernorm.weight8
+2
+input51� �02layers.5.mlp.down_proj.weight8
+0
+input52�0� 2layers.5.mlp.up_proj.weight8
+;
+input53� 2(layers.5.post_attention_layernorm.weight8
+2
+input54�0� 2layers.5.mlp.gate_proj.weight8
+<
+input55� �2'layers.6.self_attn.o_proj.o_proj.weight8
+<
+input56�� 2'layers.6.self_attn.qkv_proj.Wqkv.weight8
+2
+input57� 2layers.6.input_layernorm.weight8
+8
+input58�2%layers.6.self_attn.k_layernorm.weight8
+8
+input59�2%layers.6.self_attn.q_layernorm.weight8
+2
+input60� �02layers.6.mlp.down_proj.weight8
+0
+input61�0� 2layers.6.mlp.up_proj.weight8
+;
+input62� 2(layers.6.post_attention_layernorm.weight8
+2
+input63�0� 2layers.6.mlp.gate_proj.weight8
+<
+input64� �2'layers.7.self_attn.o_proj.o_proj.weight8
+<
+input65�� 2'layers.7.self_attn.qkv_proj.Wqkv.weight8
+2
+input66� 2layers.7.input_layernorm.weight8
+8
+input67�2%layers.7.self_attn.k_layernorm.weight8
+8
+input68�2%layers.7.self_attn.q_layernorm.weight8
+2
+input69� �02layers.7.mlp.down_proj.weight8
+0
+input70�0� 2layers.7.mlp.up_proj.weight8
+;
+input71� 2(layers.7.post_attention_layernorm.weight8
+2
+input72�0� 2layers.7.mlp.gate_proj.weight8
+<
+input73� �2'layers.8.self_attn.o_proj.o_proj.weight8
+<
+input74�� 2'layers.8.self_attn.qkv_proj.Wqkv.weight8
+2
+input75� 2layers.8.input_layernorm.weight8
+8
+input76�2%layers.8.self_attn.k_layernorm.weight8
+8
+input77�2%layers.8.self_attn.q_layernorm.weight8
+2
+input78� �02layers.8.mlp.down_proj.weight8
+0
+input79�0� 2layers.8.mlp.up_proj.weight8
+;
+input80� 2(layers.8.post_attention_layernorm.weight8
+2
+input81�0� 2layers.8.mlp.gate_proj.weight8
+<
+input82� �2'layers.9.self_attn.o_proj.o_proj.weight8
+<
+input83�� 2'layers.9.self_attn.qkv_proj.Wqkv.weight8
+2
+input84� 2layers.9.input_layernorm.weight8
+8
+input85�2%layers.9.self_attn.k_layernorm.weight8
+8
+input86�2%layers.9.self_attn.q_layernorm.weight8
+2
+input87� �02layers.9.mlp.down_proj.weight8
+0
+input88�0� 2layers.9.mlp.up_proj.weight8
+;
+input89� 2(layers.9.post_attention_layernorm.weight8
+2
+input90�0� 2layers.9.mlp.gate_proj.weight8
+=
+input91� �2(layers.10.self_attn.o_proj.o_proj.weight8
+=
+input92�� 2(layers.10.self_attn.qkv_proj.Wqkv.weight8
+3
+input93� 2 layers.10.input_layernorm.weight8
+9
+input94�2&layers.10.self_attn.k_layernorm.weight8
+9
+input95�2&layers.10.self_attn.q_layernorm.weight8
+3
+input96� �02layers.10.mlp.down_proj.weight8
+1
+input97�0� 2layers.10.mlp.up_proj.weight8
+<
+input98� 2)layers.10.post_attention_layernorm.weight8
+3
+input99�0� 2layers.10.mlp.gate_proj.weight8
+>
+input100� �2(layers.11.self_attn.o_proj.o_proj.weight8
+>
+input101�� 2(layers.11.self_attn.qkv_proj.Wqkv.weight8
+4
+input102� 2 layers.11.input_layernorm.weight8
+:
+input103�2&layers.11.self_attn.k_layernorm.weight8
+:
+input104�2&layers.11.self_attn.q_layernorm.weight8
+4
+input105� �02layers.11.mlp.down_proj.weight8
+2
+input106�0� 2layers.11.mlp.up_proj.weight8
+=
+input107� 2)layers.11.post_attention_layernorm.weight8
+4
+input108�0� 2layers.11.mlp.gate_proj.weight8
+>
+input109� �2(layers.12.self_attn.o_proj.o_proj.weight8
+>
+input110�� 2(layers.12.self_attn.qkv_proj.Wqkv.weight8
+4
+input111� 2 layers.12.input_layernorm.weight8
+:
+input112�2&layers.12.self_attn.k_layernorm.weight8
+:
+input113�2&layers.12.self_attn.q_layernorm.weight8
+4
+input114� �02layers.12.mlp.down_proj.weight8
+2
+input115�0� 2layers.12.mlp.up_proj.weight8
+=
+input116� 2)layers.12.post_attention_layernorm.weight8
+4
+input117�0� 2layers.12.mlp.gate_proj.weight8
+>
+input118� �2(layers.13.self_attn.o_proj.o_proj.weight8
+>
+input119�� 2(layers.13.self_attn.qkv_proj.Wqkv.weight8
+4
+input120� 2 layers.13.input_layernorm.weight8
+:
+input121�2&layers.13.self_attn.k_layernorm.weight8
+:
+input122�2&layers.13.self_attn.q_layernorm.weight8
+4
+input123� �02layers.13.mlp.down_proj.weight8
+2
+input124�0� 2layers.13.mlp.up_proj.weight8
+=
+input125� 2)layers.13.post_attention_layernorm.weight8
+4
+input126�0� 2layers.13.mlp.gate_proj.weight8
+>
+input127� �2(layers.14.self_attn.o_proj.o_proj.weight8
+>
+input128�� 2(layers.14.self_attn.qkv_proj.Wqkv.weight8
+4
+input129� 2 layers.14.input_layernorm.weight8
+:
+input130�2&layers.14.self_attn.k_layernorm.weight8
+:
+input131�2&layers.14.self_attn.q_layernorm.weight8
+4
+input132� �02layers.14.mlp.down_proj.weight8
+2
+input133�0� 2layers.14.mlp.up_proj.weight8
+=
+input134� 2)layers.14.post_attention_layernorm.weight8
+4
+input135�0� 2layers.14.mlp.gate_proj.weight8
+>
+input136� �2(layers.15.self_attn.o_proj.o_proj.weight8
+>
+input137�� 2(layers.15.self_attn.qkv_proj.Wqkv.weight8
+4
+input138� 2 layers.15.input_layernorm.weight8
+:
+input139�2&layers.15.self_attn.k_layernorm.weight8
+:
+input140�2&layers.15.self_attn.q_layernorm.weight8
+4
+input141� �02layers.15.mlp.down_proj.weight8
+2
+input142�0� 2layers.15.mlp.up_proj.weight8
+=
+input143� 2)layers.15.post_attention_layernorm.weight8
+4
+input144�0� 2layers.15.mlp.gate_proj.weight8
+>
+input145� �2(layers.16.self_attn.o_proj.o_proj.weight8
+>
+input146�� 2(layers.16.self_attn.qkv_proj.Wqkv.weight8
+4
+input147� 2 layers.16.input_layernorm.weight8
+:
+input148�2&layers.16.self_attn.k_layernorm.weight8
+:
+input149�2&layers.16.self_attn.q_layernorm.weight8
+4
+input150� �02layers.16.mlp.down_proj.weight8
+2
+input151�0� 2layers.16.mlp.up_proj.weight8
+=
+input152� 2)layers.16.post_attention_layernorm.weight8
+4
+input153�0� 2layers.16.mlp.gate_proj.weight8
+>
+input154� �2(layers.17.self_attn.o_proj.o_proj.weight8
+>
+input155�� 2(layers.17.self_attn.qkv_proj.Wqkv.weight8
+4
+input156� 2 layers.17.input_layernorm.weight8
+:
+input157�2&layers.17.self_attn.k_layernorm.weight8
+:
+input158�2&layers.17.self_attn.q_layernorm.weight8
+4
+input159� �02layers.17.mlp.down_proj.weight8
+2
+input160�0� 2layers.17.mlp.up_proj.weight8
+=
+input161� 2)layers.17.post_attention_layernorm.weight8
+4
+input162�0� 2layers.17.mlp.gate_proj.weight8
+>
+input163� �2(layers.18.self_attn.o_proj.o_proj.weight8
+>
+input164�� 2(layers.18.self_attn.qkv_proj.Wqkv.weight8
+4
+input165� 2 layers.18.input_layernorm.weight8
+:
+input166�2&layers.18.self_attn.k_layernorm.weight8
+:
+input167�2&layers.18.self_attn.q_layernorm.weight8
+4
+input168� �02layers.18.mlp.down_proj.weight8
+2
+input169�0� 2layers.18.mlp.up_proj.weight8
+=
+input170� 2)layers.18.post_attention_layernorm.weight8
+4
+input171�0� 2layers.18.mlp.gate_proj.weight8
+>
+input172� �2(layers.19.self_attn.o_proj.o_proj.weight8
+>
+input173�� 2(layers.19.self_attn.qkv_proj.Wqkv.weight8
+4
+input174� 2 layers.19.input_layernorm.weight8
+:
+input175�2&layers.19.self_attn.k_layernorm.weight8
+:
+input176�2&layers.19.self_attn.q_layernorm.weight8
+4
+input177� �02layers.19.mlp.down_proj.weight8
+2
+input178�0� 2layers.19.mlp.up_proj.weight8
+=
+input179� 2)layers.19.post_attention_layernorm.weight8
+4
+input180�0� 2layers.19.mlp.gate_proj.weight8
+>
+input181� �2(layers.20.self_attn.o_proj.o_proj.weight8
+>
+input182�� 2(layers.20.self_attn.qkv_proj.Wqkv.weight8
+4
+input183� 2 layers.20.input_layernorm.weight8
+:
+input184�2&layers.20.self_attn.k_layernorm.weight8
+:
+input185�2&layers.20.self_attn.q_layernorm.weight8
+4
+input186� �02layers.20.mlp.down_proj.weight8
+2
+input187�0� 2layers.20.mlp.up_proj.weight8
+=
+input188� 2)layers.20.post_attention_layernorm.weight8
+4
+input189�0� 2layers.20.mlp.gate_proj.weight8
+>
+input190� �2(layers.21.self_attn.o_proj.o_proj.weight8
+>
+input191�� 2(layers.21.self_attn.qkv_proj.Wqkv.weight8
+4
+input192� 2 layers.21.input_layernorm.weight8
+:
+input193�2&layers.21.self_attn.k_layernorm.weight8
+:
+input194�2&layers.21.self_attn.q_layernorm.weight8
+4
+input195� �02layers.21.mlp.down_proj.weight8
+2
+input196�0� 2layers.21.mlp.up_proj.weight8
+=
+input197� 2)layers.21.post_attention_layernorm.weight8
+4
+input198�0� 2layers.21.mlp.gate_proj.weight8
+>
+input199� �2(layers.22.self_attn.o_proj.o_proj.weight8
+>
+input200�� 2(layers.22.self_attn.qkv_proj.Wqkv.weight8
+4
+input201� 2 layers.22.input_layernorm.weight8
+:
+input202�2&layers.22.self_attn.k_layernorm.weight8
+:
+input203�2&layers.22.self_attn.q_layernorm.weight8
+4
+input204� �02layers.22.mlp.down_proj.weight8
+2
+input205�0� 2layers.22.mlp.up_proj.weight8
+=
+input206� 2)layers.22.post_attention_layernorm.weight8
+4
+input207�0� 2layers.22.mlp.gate_proj.weight8
+>
+input208� �2(layers.23.self_attn.o_proj.o_proj.weight8
+>
+input209�� 2(layers.23.self_attn.qkv_proj.Wqkv.weight8
+4
+input210� 2 layers.23.input_layernorm.weight8
+:
+input211�2&layers.23.self_attn.k_layernorm.weight8
+:
+input212�2&layers.23.self_attn.q_layernorm.weight8
+4
+input213� �02layers.23.mlp.down_proj.weight8
+2
+input214�0� 2layers.23.mlp.up_proj.weight8
+=
+input215� 2)layers.23.post_attention_layernorm.weight8
+4
+input216�0� 2layers.23.mlp.gate_proj.weight8
+>
+input217� �2(layers.24.self_attn.o_proj.o_proj.weight8
+>
+input218�� 2(layers.24.self_attn.qkv_proj.Wqkv.weight8
+4
+input219� 2 layers.24.input_layernorm.weight8
+:
+input220�2&layers.24.self_attn.k_layernorm.weight8
+:
+input221�2&layers.24.self_attn.q_layernorm.weight8
+4
+input222� �02layers.24.mlp.down_proj.weight8
+2
+input223�0� 2layers.24.mlp.up_proj.weight8
+=
+input224� 2)layers.24.post_attention_layernorm.weight8
+4
+input225�0� 2layers.24.mlp.gate_proj.weight8
+>
+input226� �2(layers.25.self_attn.o_proj.o_proj.weight8
+>
+input227�� 2(layers.25.self_attn.qkv_proj.Wqkv.weight8
+4
+input228� 2 layers.25.input_layernorm.weight8
+:
+input229�2&layers.25.self_attn.k_layernorm.weight8
+:
+input230�2&layers.25.self_attn.q_layernorm.weight8
+4
+input231� �02layers.25.mlp.down_proj.weight8
+2
+input232�0� 2layers.25.mlp.up_proj.weight8
+=
+input233� 2)layers.25.post_attention_layernorm.weight8
+4
+input234�0� 2layers.25.mlp.gate_proj.weight8
+>
+input235� �2(layers.26.self_attn.o_proj.o_proj.weight8
+>
+input236�� 2(layers.26.self_attn.qkv_proj.Wqkv.weight8
+4
+input237� 2 layers.26.input_layernorm.weight8
+:
+input238�2&layers.26.self_attn.k_layernorm.weight8
+:
+input239�2&layers.26.self_attn.q_layernorm.weight8
+4
+input240� �02layers.26.mlp.down_proj.weight8
+2
+input241�0� 2layers.26.mlp.up_proj.weight8
+=
+input242� 2)layers.26.post_attention_layernorm.weight8
+4
+input243�0� 2layers.26.mlp.gate_proj.weight8
+>
+input244� �2(layers.27.self_attn.o_proj.o_proj.weight8
+>
+input245�� 2(layers.27.self_attn.qkv_proj.Wqkv.weight8
+4
+input246� 2 layers.27.input_layernorm.weight8
+:
+input247�2&layers.27.self_attn.k_layernorm.weight8
+:
+input248�2&layers.27.self_attn.q_layernorm.weight8
+4
+input249� �02layers.27.mlp.down_proj.weight8
+2
+input250�0� 2layers.27.mlp.up_proj.weight8
+=
+input251� 2)layers.27.post_attention_layernorm.weight8
+4
+input252�0� 2layers.27.mlp.gate_proj.weight8
+>
+input253� �2(layers.28.self_attn.o_proj.o_proj.weight8
+>
+input254�� 2(layers.28.self_attn.qkv_proj.Wqkv.weight8
+4
+input255� 2 layers.28.input_layernorm.weight8
+:
+input256�2&layers.28.self_attn.k_layernorm.weight8
+:
+input257�2&layers.28.self_attn.q_layernorm.weight8
+4
+input258� �02layers.28.mlp.down_proj.weight8
+2
+input259�0� 2layers.28.mlp.up_proj.weight8
+=
+input260� 2)layers.28.post_attention_layernorm.weight8
+4
+input261�0� 2layers.28.mlp.gate_proj.weight8
+>
+input262� �2(layers.29.self_attn.o_proj.o_proj.weight8
+>
+input263�� 2(layers.29.self_attn.qkv_proj.Wqkv.weight8
+4
+input264� 2 layers.29.input_layernorm.weight8
+:
+input265�2&layers.29.self_attn.k_layernorm.weight8
+:
+input266�2&layers.29.self_attn.q_layernorm.weight8
+4
+input267� �02layers.29.mlp.down_proj.weight8
+2
+input268�0� 2layers.29.mlp.up_proj.weight8
+=
+input269� 2)layers.29.post_attention_layernorm.weight8
+4
+input270�0� 2layers.29.mlp.gate_proj.weight8
+>
+input271� �2(layers.30.self_attn.o_proj.o_proj.weight8
+>
+input272�� 2(layers.30.self_attn.qkv_proj.Wqkv.weight8
+4
+input273� 2 layers.30.input_layernorm.weight8
+:
+input274�2&layers.30.self_attn.k_layernorm.weight8
+:
+input275�2&layers.30.self_attn.q_layernorm.weight8
+4
+input276� �02layers.30.mlp.down_proj.weight8
+2
+input277�0� 2layers.30.mlp.up_proj.weight8
+=
+input278� 2)layers.30.post_attention_layernorm.weight8
+4
+input279�0� 2layers.30.mlp.gate_proj.weight8
+>
+input280� �2(layers.31.self_attn.o_proj.o_proj.weight8
+>
+input281�� 2(layers.31.self_attn.qkv_proj.Wqkv.weight8
+4
+input282� 2 layers.31.input_layernorm.weight8
+:
+input283�2&layers.31.self_attn.k_layernorm.weight8
+:
+input284�2&layers.31.self_attn.q_layernorm.weight8
+4
+input285� �02layers.31.mlp.down_proj.weight8
+2
+input286�0� 2layers.31.mlp.up_proj.weight8
+=
+input287� 2)layers.31.post_attention_layernorm.weight8
+4
+input288�0� 2layers.31.mlp.gate_proj.weight8
+>
+input289� �2(layers.32.self_attn.o_proj.o_proj.weight8
+>
+input290�� 2(layers.32.self_attn.qkv_proj.Wqkv.weight8
+4
+input291� 2 layers.32.input_layernorm.weight8
+:
+input292�2&layers.32.self_attn.k_layernorm.weight8
+:
+input293�2&layers.32.self_attn.q_layernorm.weight8
+4
+input294� �02layers.32.mlp.down_proj.weight8
+2
+input295�0� 2layers.32.mlp.up_proj.weight8
+=
+input296� 2)layers.32.post_attention_layernorm.weight8
+4
+input297�0� 2layers.32.mlp.gate_proj.weight8
+>
+input298� �2(layers.33.self_attn.o_proj.o_proj.weight8
+>
+input299�� 2(layers.33.self_attn.qkv_proj.Wqkv.weight8
+4
+input300� 2 layers.33.input_layernorm.weight8
+:
+input301�2&layers.33.self_attn.k_layernorm.weight8
+:
+input302�2&layers.33.self_attn.q_layernorm.weight8
+4
+input303� �02layers.33.mlp.down_proj.weight8
+2
+input304�0� 2layers.33.mlp.up_proj.weight8
+=
+input305� 2)layers.33.post_attention_layernorm.weight8
+4
+input306�0� 2layers.33.mlp.gate_proj.weight8
+>
+input307� �2(layers.34.self_attn.o_proj.o_proj.weight8
+>
+input308�� 2(layers.34.self_attn.qkv_proj.Wqkv.weight8
+4
+input309� 2 layers.34.input_layernorm.weight8
+:
+input310�2&layers.34.self_attn.k_layernorm.weight8
+:
+input311�2&layers.34.self_attn.q_layernorm.weight8
+4
+input312� �02layers.34.mlp.down_proj.weight8
+2
+input313�0� 2layers.34.mlp.up_proj.weight8
+=
+input314� 2)layers.34.post_attention_layernorm.weight8
+4
+input315�0� 2layers.34.mlp.gate_proj.weight8
+>
+input316� �2(layers.35.self_attn.o_proj.o_proj.weight8
+>
+input317�� 2(layers.35.self_attn.qkv_proj.Wqkv.weight8
+4
+input318� 2 layers.35.input_layernorm.weight8
+:
+input319�2&layers.35.self_attn.k_layernorm.weight8
+:
+input320�2&layers.35.self_attn.q_layernorm.weight8
+4
+input321� �02layers.35.mlp.down_proj.weight8
+2
+input322�0� 2layers.35.mlp.up_proj.weight8
+=
+input323� 2)layers.35.post_attention_layernorm.weight8
+4
+input324�0� 2layers.35.mlp.gate_proj.weight8
+%
+input325��� 2lm_head.weight8
+input326� 2norm.weight8'
+output0��	�2embed_tokens.weight:
+output1� �2'layers.0.self_attn.o_proj.o_proj.weight:
+output2�� 2'layers.0.self_attn.qkv_proj.Wqkv.weight0
+output3� 2layers.0.input_layernorm.weight6
+output4�2%layers.0.self_attn.k_layernorm.weight6
+output5�2%layers.0.self_attn.q_layernorm.weight0
+output6� �02layers.0.mlp.down_proj.weight.
+output7�0� 2layers.0.mlp.up_proj.weight9
+output8� 2(layers.0.post_attention_layernorm.weight0
+output9�0� 2layers.0.mlp.gate_proj.weight;
+output10� �2'layers.1.self_attn.o_proj.o_proj.weight;
+output11�� 2'layers.1.self_attn.qkv_proj.Wqkv.weight1
+output12� 2layers.1.input_layernorm.weight7
+output13�2%layers.1.self_attn.k_layernorm.weight7
+output14�2%layers.1.self_attn.q_layernorm.weight1
+output15� �02layers.1.mlp.down_proj.weight/
+output16�0� 2layers.1.mlp.up_proj.weight:
+output17� 2(layers.1.post_attention_layernorm.weight1
+output18�0� 2layers.1.mlp.gate_proj.weight;
+output19� �2'layers.2.self_attn.o_proj.o_proj.weight;
+output20�� 2'layers.2.self_attn.qkv_proj.Wqkv.weight1
+output21� 2layers.2.input_layernorm.weight7
+output22�2%layers.2.self_attn.k_layernorm.weight7
+output23�2%layers.2.self_attn.q_layernorm.weight1
+output24� �02layers.2.mlp.down_proj.weight/
+output25�0� 2layers.2.mlp.up_proj.weight:
+output26� 2(layers.2.post_attention_layernorm.weight1
+output27�0� 2layers.2.mlp.gate_proj.weight;
+output28� �2'layers.3.self_attn.o_proj.o_proj.weight;
+output29�� 2'layers.3.self_attn.qkv_proj.Wqkv.weight1
+output30� 2layers.3.input_layernorm.weight7
+output31�2%layers.3.self_attn.k_layernorm.weight7
+output32�2%layers.3.self_attn.q_layernorm.weight1
+output33� �02layers.3.mlp.down_proj.weight/
+output34�0� 2layers.3.mlp.up_proj.weight:
+output35� 2(layers.3.post_attention_layernorm.weight1
+output36�0� 2layers.3.mlp.gate_proj.weight;
+output37� �2'layers.4.self_attn.o_proj.o_proj.weight;
+output38�� 2'layers.4.self_attn.qkv_proj.Wqkv.weight1
+output39� 2layers.4.input_layernorm.weight7
+output40�2%layers.4.self_attn.k_layernorm.weight7
+output41�2%layers.4.self_attn.q_layernorm.weight1
+output42� �02layers.4.mlp.down_proj.weight/
+output43�0� 2layers.4.mlp.up_proj.weight:
+output44� 2(layers.4.post_attention_layernorm.weight1
+output45�0� 2layers.4.mlp.gate_proj.weight;
+output46� �2'layers.5.self_attn.o_proj.o_proj.weight;
+output47�� 2'layers.5.self_attn.qkv_proj.Wqkv.weight1
+output48� 2layers.5.input_layernorm.weight7
+output49�2%layers.5.self_attn.k_layernorm.weight7
+output50�2%layers.5.self_attn.q_layernorm.weight1
+output51� �02layers.5.mlp.down_proj.weight/
+output52�0� 2layers.5.mlp.up_proj.weight:
+output53� 2(layers.5.post_attention_layernorm.weight1
+output54�0� 2layers.5.mlp.gate_proj.weight;
+output55� �2'layers.6.self_attn.o_proj.o_proj.weight;
+output56�� 2'layers.6.self_attn.qkv_proj.Wqkv.weight1
+output57� 2layers.6.input_layernorm.weight7
+output58�2%layers.6.self_attn.k_layernorm.weight7
+output59�2%layers.6.self_attn.q_layernorm.weight1
+output60� �02layers.6.mlp.down_proj.weight/
+output61�0� 2layers.6.mlp.up_proj.weight:
+output62� 2(layers.6.post_attention_layernorm.weight1
+output63�0� 2layers.6.mlp.gate_proj.weight;
+output64� �2'layers.7.self_attn.o_proj.o_proj.weight;
+output65�� 2'layers.7.self_attn.qkv_proj.Wqkv.weight1
+output66� 2layers.7.input_layernorm.weight7
+output67�2%layers.7.self_attn.k_layernorm.weight7
+output68�2%layers.7.self_attn.q_layernorm.weight1
+output69� �02layers.7.mlp.down_proj.weight/
+output70�0� 2layers.7.mlp.up_proj.weight:
+output71� 2(layers.7.post_attention_layernorm.weight1
+output72�0� 2layers.7.mlp.gate_proj.weight;
+output73� �2'layers.8.self_attn.o_proj.o_proj.weight;
+output74�� 2'layers.8.self_attn.qkv_proj.Wqkv.weight1
+output75� 2layers.8.input_layernorm.weight7
+output76�2%layers.8.self_attn.k_layernorm.weight7
+output77�2%layers.8.self_attn.q_layernorm.weight1
+output78� �02layers.8.mlp.down_proj.weight/
+output79�0� 2layers.8.mlp.up_proj.weight:
+output80� 2(layers.8.post_attention_layernorm.weight1
+output81�0� 2layers.8.mlp.gate_proj.weight;
+output82� �2'layers.9.self_attn.o_proj.o_proj.weight;
+output83�� 2'layers.9.self_attn.qkv_proj.Wqkv.weight1
+output84� 2layers.9.input_layernorm.weight7
+output85�2%layers.9.self_attn.k_layernorm.weight7
+output86�2%layers.9.self_attn.q_layernorm.weight1
+output87� �02layers.9.mlp.down_proj.weight/
+output88�0� 2layers.9.mlp.up_proj.weight:
+output89� 2(layers.9.post_attention_layernorm.weight1
+output90�0� 2layers.9.mlp.gate_proj.weight<
+output91� �2(layers.10.self_attn.o_proj.o_proj.weight<
+output92�� 2(layers.10.self_attn.qkv_proj.Wqkv.weight2
+output93� 2 layers.10.input_layernorm.weight8
+output94�2&layers.10.self_attn.k_layernorm.weight8
+output95�2&layers.10.self_attn.q_layernorm.weight2
+output96� �02layers.10.mlp.down_proj.weight0
+output97�0� 2layers.10.mlp.up_proj.weight;
+output98� 2)layers.10.post_attention_layernorm.weight2
+output99�0� 2layers.10.mlp.gate_proj.weight=
+	output100� �2(layers.11.self_attn.o_proj.o_proj.weight=
+	output101�� 2(layers.11.self_attn.qkv_proj.Wqkv.weight3
+	output102� 2 layers.11.input_layernorm.weight9
+	output103�2&layers.11.self_attn.k_layernorm.weight9
+	output104�2&layers.11.self_attn.q_layernorm.weight3
+	output105� �02layers.11.mlp.down_proj.weight1
+	output106�0� 2layers.11.mlp.up_proj.weight<
+	output107� 2)layers.11.post_attention_layernorm.weight3
+	output108�0� 2layers.11.mlp.gate_proj.weight=
+	output109� �2(layers.12.self_attn.o_proj.o_proj.weight=
+	output110�� 2(layers.12.self_attn.qkv_proj.Wqkv.weight3
+	output111� 2 layers.12.input_layernorm.weight9
+	output112�2&layers.12.self_attn.k_layernorm.weight9
+	output113�2&layers.12.self_attn.q_layernorm.weight3
+	output114� �02layers.12.mlp.down_proj.weight1
+	output115�0� 2layers.12.mlp.up_proj.weight<
+	output116� 2)layers.12.post_attention_layernorm.weight3
+	output117�0� 2layers.12.mlp.gate_proj.weight=
+	output118� �2(layers.13.self_attn.o_proj.o_proj.weight=
+	output119�� 2(layers.13.self_attn.qkv_proj.Wqkv.weight3
+	output120� 2 layers.13.input_layernorm.weight9
+	output121�2&layers.13.self_attn.k_layernorm.weight9
+	output122�2&layers.13.self_attn.q_layernorm.weight3
+	output123� �02layers.13.mlp.down_proj.weight1
+	output124�0� 2layers.13.mlp.up_proj.weight<
+	output125� 2)layers.13.post_attention_layernorm.weight3
+	output126�0� 2layers.13.mlp.gate_proj.weight=
+	output127� �2(layers.14.self_attn.o_proj.o_proj.weight=
+	output128�� 2(layers.14.self_attn.qkv_proj.Wqkv.weight3
+	output129� 2 layers.14.input_layernorm.weight9
+	output130�2&layers.14.self_attn.k_layernorm.weight9
+	output131�2&layers.14.self_attn.q_layernorm.weight3
+	output132� �02layers.14.mlp.down_proj.weight1
+	output133�0� 2layers.14.mlp.up_proj.weight<
+	output134� 2)layers.14.post_attention_layernorm.weight3
+	output135�0� 2layers.14.mlp.gate_proj.weight=
+	output136� �2(layers.15.self_attn.o_proj.o_proj.weight=
+	output137�� 2(layers.15.self_attn.qkv_proj.Wqkv.weight3
+	output138� 2 layers.15.input_layernorm.weight9
+	output139�2&layers.15.self_attn.k_layernorm.weight9
+	output140�2&layers.15.self_attn.q_layernorm.weight3
+	output141� �02layers.15.mlp.down_proj.weight1
+	output142�0� 2layers.15.mlp.up_proj.weight<
+	output143� 2)layers.15.post_attention_layernorm.weight3
+	output144�0� 2layers.15.mlp.gate_proj.weight=
+	output145� �2(layers.16.self_attn.o_proj.o_proj.weight=
+	output146�� 2(layers.16.self_attn.qkv_proj.Wqkv.weight3
+	output147� 2 layers.16.input_layernorm.weight9
+	output148�2&layers.16.self_attn.k_layernorm.weight9
+	output149�2&layers.16.self_attn.q_layernorm.weight3
+	output150� �02layers.16.mlp.down_proj.weight1
+	output151�0� 2layers.16.mlp.up_proj.weight<
+	output152� 2)layers.16.post_attention_layernorm.weight3
+	output153�0� 2layers.16.mlp.gate_proj.weight=
+	output154� �2(layers.17.self_attn.o_proj.o_proj.weight=
+	output155�� 2(layers.17.self_attn.qkv_proj.Wqkv.weight3
+	output156� 2 layers.17.input_layernorm.weight9
+	output157�2&layers.17.self_attn.k_layernorm.weight9
+	output158�2&layers.17.self_attn.q_layernorm.weight3
+	output159� �02layers.17.mlp.down_proj.weight1
+	output160�0� 2layers.17.mlp.up_proj.weight<
+	output161� 2)layers.17.post_attention_layernorm.weight3
+	output162�0� 2layers.17.mlp.gate_proj.weight=
+	output163� �2(layers.18.self_attn.o_proj.o_proj.weight=
+	output164�� 2(layers.18.self_attn.qkv_proj.Wqkv.weight3
+	output165� 2 layers.18.input_layernorm.weight9
+	output166�2&layers.18.self_attn.k_layernorm.weight9
+	output167�2&layers.18.self_attn.q_layernorm.weight3
+	output168� �02layers.18.mlp.down_proj.weight1
+	output169�0� 2layers.18.mlp.up_proj.weight<
+	output170� 2)layers.18.post_attention_layernorm.weight3
+	output171�0� 2layers.18.mlp.gate_proj.weight=
+	output172� �2(layers.19.self_attn.o_proj.o_proj.weight=
+	output173�� 2(layers.19.self_attn.qkv_proj.Wqkv.weight3
+	output174� 2 layers.19.input_layernorm.weight9
+	output175�2&layers.19.self_attn.k_layernorm.weight9
+	output176�2&layers.19.self_attn.q_layernorm.weight3
+	output177� �02layers.19.mlp.down_proj.weight1
+	output178�0� 2layers.19.mlp.up_proj.weight<
+	output179� 2)layers.19.post_attention_layernorm.weight3
+	output180�0� 2layers.19.mlp.gate_proj.weight=
+	output181� �2(layers.20.self_attn.o_proj.o_proj.weight=
+	output182�� 2(layers.20.self_attn.qkv_proj.Wqkv.weight3
+	output183� 2 layers.20.input_layernorm.weight9
+	output184�2&layers.20.self_attn.k_layernorm.weight9
+	output185�2&layers.20.self_attn.q_layernorm.weight3
+	output186� �02layers.20.mlp.down_proj.weight1
+	output187�0� 2layers.20.mlp.up_proj.weight<
+	output188� 2)layers.20.post_attention_layernorm.weight3
+	output189�0� 2layers.20.mlp.gate_proj.weight=
+	output190� �2(layers.21.self_attn.o_proj.o_proj.weight=
+	output191�� 2(layers.21.self_attn.qkv_proj.Wqkv.weight3
+	output192� 2 layers.21.input_layernorm.weight9
+	output193�2&layers.21.self_attn.k_layernorm.weight9
+	output194�2&layers.21.self_attn.q_layernorm.weight3
+	output195� �02layers.21.mlp.down_proj.weight1
+	output196�0� 2layers.21.mlp.up_proj.weight<
+	output197� 2)layers.21.post_attention_layernorm.weight3
+	output198�0� 2layers.21.mlp.gate_proj.weight=
+	output199� �2(layers.22.self_attn.o_proj.o_proj.weight=
+	output200�� 2(layers.22.self_attn.qkv_proj.Wqkv.weight3
+	output201� 2 layers.22.input_layernorm.weight9
+	output202�2&layers.22.self_attn.k_layernorm.weight9
+	output203�2&layers.22.self_attn.q_layernorm.weight3
+	output204� �02layers.22.mlp.down_proj.weight1
+	output205�0� 2layers.22.mlp.up_proj.weight<
+	output206� 2)layers.22.post_attention_layernorm.weight3
+	output207�0� 2layers.22.mlp.gate_proj.weight=
+	output208� �2(layers.23.self_attn.o_proj.o_proj.weight=
+	output209�� 2(layers.23.self_attn.qkv_proj.Wqkv.weight3
+	output210� 2 layers.23.input_layernorm.weight9
+	output211�2&layers.23.self_attn.k_layernorm.weight9
+	output212�2&layers.23.self_attn.q_layernorm.weight3
+	output213� �02layers.23.mlp.down_proj.weight1
+	output214�0� 2layers.23.mlp.up_proj.weight<
+	output215� 2)layers.23.post_attention_layernorm.weight3
+	output216�0� 2layers.23.mlp.gate_proj.weight=
+	output217� �2(layers.24.self_attn.o_proj.o_proj.weight=
+	output218�� 2(layers.24.self_attn.qkv_proj.Wqkv.weight3
+	output219� 2 layers.24.input_layernorm.weight9
+	output220�2&layers.24.self_attn.k_layernorm.weight9
+	output221�2&layers.24.self_attn.q_layernorm.weight3
+	output222� �02layers.24.mlp.down_proj.weight1
+	output223�0� 2layers.24.mlp.up_proj.weight<
+	output224� 2)layers.24.post_attention_layernorm.weight3
+	output225�0� 2layers.24.mlp.gate_proj.weight=
+	output226� �2(layers.25.self_attn.o_proj.o_proj.weight=
+	output227�� 2(layers.25.self_attn.qkv_proj.Wqkv.weight3
+	output228� 2 layers.25.input_layernorm.weight9
+	output229�2&layers.25.self_attn.k_layernorm.weight9
+	output230�2&layers.25.self_attn.q_layernorm.weight3
+	output231� �02layers.25.mlp.down_proj.weight1
+	output232�0� 2layers.25.mlp.up_proj.weight<
+	output233� 2)layers.25.post_attention_layernorm.weight3
+	output234�0� 2layers.25.mlp.gate_proj.weight=
+	output235� �2(layers.26.self_attn.o_proj.o_proj.weight=
+	output236�� 2(layers.26.self_attn.qkv_proj.Wqkv.weight3
+	output237� 2 layers.26.input_layernorm.weight9
+	output238�2&layers.26.self_attn.k_layernorm.weight9
+	output239�2&layers.26.self_attn.q_layernorm.weight3
+	output240� �02layers.26.mlp.down_proj.weight1
+	output241�0� 2layers.26.mlp.up_proj.weight<
+	output242� 2)layers.26.post_attention_layernorm.weight3
+	output243�0� 2layers.26.mlp.gate_proj.weight=
+	output244� �2(layers.27.self_attn.o_proj.o_proj.weight=
+	output245�� 2(layers.27.self_attn.qkv_proj.Wqkv.weight3
+	output246� 2 layers.27.input_layernorm.weight9
+	output247�2&layers.27.self_attn.k_layernorm.weight9
+	output248�2&layers.27.self_attn.q_layernorm.weight3
+	output249� �02layers.27.mlp.down_proj.weight1
+	output250�0� 2layers.27.mlp.up_proj.weight<
+	output251� 2)layers.27.post_attention_layernorm.weight3
+	output252�0� 2layers.27.mlp.gate_proj.weight=
+	output253� �2(layers.28.self_attn.o_proj.o_proj.weight=
+	output254�� 2(layers.28.self_attn.qkv_proj.Wqkv.weight3
+	output255� 2 layers.28.input_layernorm.weight9
+	output256�2&layers.28.self_attn.k_layernorm.weight9
+	output257�2&layers.28.self_attn.q_layernorm.weight3
+	output258� �02layers.28.mlp.down_proj.weight1
+	output259�0� 2layers.28.mlp.up_proj.weight<
+	output260� 2)layers.28.post_attention_layernorm.weight3
+	output261�0� 2layers.28.mlp.gate_proj.weight=
+	output262� �2(layers.29.self_attn.o_proj.o_proj.weight=
+	output263�� 2(layers.29.self_attn.qkv_proj.Wqkv.weight3
+	output264� 2 layers.29.input_layernorm.weight9
+	output265�2&layers.29.self_attn.k_layernorm.weight9
+	output266�2&layers.29.self_attn.q_layernorm.weight3
+	output267� �02layers.29.mlp.down_proj.weight1
+	output268�0� 2layers.29.mlp.up_proj.weight<
+	output269� 2)layers.29.post_attention_layernorm.weight3
+	output270�0� 2layers.29.mlp.gate_proj.weight=
+	output271� �2(layers.30.self_attn.o_proj.o_proj.weight=
+	output272�� 2(layers.30.self_attn.qkv_proj.Wqkv.weight3
+	output273� 2 layers.30.input_layernorm.weight9
+	output274�2&layers.30.self_attn.k_layernorm.weight9
+	output275�2&layers.30.self_attn.q_layernorm.weight3
+	output276� �02layers.30.mlp.down_proj.weight1
+	output277�0� 2layers.30.mlp.up_proj.weight<
+	output278� 2)layers.30.post_attention_layernorm.weight3
+	output279�0� 2layers.30.mlp.gate_proj.weight=
+	output280� �2(layers.31.self_attn.o_proj.o_proj.weight=
+	output281�� 2(layers.31.self_attn.qkv_proj.Wqkv.weight3
+	output282� 2 layers.31.input_layernorm.weight9
+	output283�2&layers.31.self_attn.k_layernorm.weight9
+	output284�2&layers.31.self_attn.q_layernorm.weight3
+	output285� �02layers.31.mlp.down_proj.weight1
+	output286�0� 2layers.31.mlp.up_proj.weight<
+	output287� 2)layers.31.post_attention_layernorm.weight3
+	output288�0� 2layers.31.mlp.gate_proj.weight=
+	output289� �2(layers.32.self_attn.o_proj.o_proj.weight=
+	output290�� 2(layers.32.self_attn.qkv_proj.Wqkv.weight3
+	output291� 2 layers.32.input_layernorm.weight9
+	output292�2&layers.32.self_attn.k_layernorm.weight9
+	output293�2&layers.32.self_attn.q_layernorm.weight3
+	output294� �02layers.32.mlp.down_proj.weight1
+	output295�0� 2layers.32.mlp.up_proj.weight<
+	output296� 2)layers.32.post_attention_layernorm.weight3
+	output297�0� 2layers.32.mlp.gate_proj.weight=
+	output298� �2(layers.33.self_attn.o_proj.o_proj.weight=
+	output299�� 2(layers.33.self_attn.qkv_proj.Wqkv.weight3
+	output300� 2 layers.33.input_layernorm.weight9
+	output301�2&layers.33.self_attn.k_layernorm.weight9
+	output302�2&layers.33.self_attn.q_layernorm.weight3
+	output303� �02layers.33.mlp.down_proj.weight1
+	output304�0� 2layers.33.mlp.up_proj.weight<
+	output305� 2)layers.33.post_attention_layernorm.weight3
+	output306�0� 2layers.33.mlp.gate_proj.weight=
+	output307� �2(layers.34.self_attn.o_proj.o_proj.weight=
+	output308�� 2(layers.34.self_attn.qkv_proj.Wqkv.weight3
+	output309� 2 layers.34.input_layernorm.weight9
+	output310�2&layers.34.self_attn.k_layernorm.weight9
+	output311�2&layers.34.self_attn.q_layernorm.weight3
+	output312� �02layers.34.mlp.down_proj.weight1
+	output313�0� 2layers.34.mlp.up_proj.weight<
+	output314� 2)layers.34.post_attention_layernorm.weight3
+	output315�0� 2layers.34.mlp.gate_proj.weight=
+	output316� �2(layers.35.self_attn.o_proj.o_proj.weight=
+	output317�� 2(layers.35.self_attn.qkv_proj.Wqkv.weight3
+	output318� 2 layers.35.input_layernorm.weight9
+	output319�2&layers.35.self_attn.k_layernorm.weight9
+	output320�2&layers.35.self_attn.q_layernorm.weight3
+	output321� �02layers.35.mlp.down_proj.weight1
+	output322�0� 2layers.35.mlp.up_proj.weight<
+	output323� 2)layers.35.post_attention_layernorm.weight3
+	output324�0� 2layers.35.mlp.gate_proj.weight$
+	output325��� 2lm_head.weight
+	output326� 2norm.weight

layout_opt/model/graph.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:faa8f2dc49f4606210aa2baec2b92796320bf4e8f2f13139e7db28860aa0ad17
+size 173259

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:464989b5c79dac0618dd8b9d1c58df8196ec48f89f913ca9ad1e530e04edff5f
+size 17614391015

neuron_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_serialized_key": "NxDNeuronConfig",
+  "async_mode": false,
+  "attn_kernel_enabled": false,
+  "batch_size": 4,
+  "capacity_factor": null,
+  "cc_pipeline_tiling_factor": 2,
+  "checkpoint_id": "karanps/ChessLM_Qwen3",
+  "checkpoint_revision": "e0d57507d96b2be2dd0dc901ecb231dec2dd6330",
+  "continuous_batching": true,
+  "enable_bucketing": false,
+  "ep_degree": 1,
+  "flash_decoding_enabled": false,
+  "fused_qkv": true,
+  "glu_mlp": true,
+  "is_chunked_prefill": false,
+  "local_ranks_size": 2,
+  "logical_nc_config": 1,
+  "max_batch_size": 4,
+  "max_context_length": 2048,
+  "max_topk": 256,
+  "mlp_kernel_enabled": false,
+  "mlp_kernel_fuse_residual_add": false,
+  "n_active_tokens": 2048,
+  "neuronxcc_version": "2.21.33363.0+82129205",
+  "num_cores_per_group": 1,
+  "on_device_sampling": false,
+  "optimum_neuron_version": "0.3.0",
+  "output_logits": false,
+  "padding_side": "right",
+  "pp_degree": 1,
+  "qk_layernorm": false,
+  "qkv_kernel_enabled": false,
+  "rpl_reduce_dtype": "bfloat16",
+  "sequence_length": 2048,
+  "sequence_parallel_enabled": false,
+  "speculation_length": 0,
+  "start_rank_id": 0,
+  "target": null,
+  "torch_dtype": "bfloat16",
+  "tp_degree": 2,
+  "vocab_parallel": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

token_generation_model/_tp0_bk0/command.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ neuronx-cc compile --framework=XLA model.MODULE_8f245c7816a398e13e79+a9d440f5.hlo_module.pb --output model.MODULE_8f245c7816a398e13e79+a9d440f5.neff --target=trn1 --auto-cast=none --model-type=transformer '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ' -O2 --lnc=1 --logfile=log-neuron-cc.txt --enable-internal-neff-wrapper --verbose=35

token_generation_model/_tp0_bk0/compile_flags.MODULE_8f245c7816a398e13e79+a9d440f5.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

token_generation_model/_tp0_bk0/global_metric_store.json ADDED Viewed

	@@ -0,0 +1,524 @@

+{
+  "Average": {
+    "tensorizer": {
+      "StaticProfiler::AverageFractalPeUtilization": 99.34062957763672,
+      "StaticProfiler::AveragePartitionUtilization": 98.38597106933594,
+      "StaticProfiler::AveragePeUtilization": 97.22911071777344,
+      "StaticProfiler::LocalizationEfficiency": 114.75756072998047,
+      "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 114.9507064819336,
+      "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0,
+      "TilingProfiler::AveragePeUtilizationAfterTiling": 0
+    }
+  },
+  "Count": {
+    "tensorizer": {
+      "StaticProfiler::AverageFractalPeUtilization": 1,
+      "StaticProfiler::AveragePartitionUtilization": 1,
+      "StaticProfiler::AveragePeUtilization": 1,
+      "StaticProfiler::LocalizationEfficiency": 1,
+      "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1,
+      "TilingProfiler::AveragePartitionUtilizationAfterTiling": 1,
+      "TilingProfiler::AveragePeUtilizationAfterTiling": 1
+    }
+  },
+  "Sum": {
+    "compiletime": {
+      "AGOrderingAnalysisPass": 2.1720478534698486,
+      "AffinePredicateResolution": 0.05626630783081055,
+      "AliasDependencyElimination": 0.0026874542236328125,
+      "AliasDependencyInduction": 0.5170383453369141,
+      "AliasDependencyReset": 2.2118747234344482,
+      "BFComputeCutting": 0.12706279754638672,
+      "BirCodeGenLoop": 2.4660451412200928,
+      "CCOpFusion": 0.8360562324523926,
+      "CanonicalizeConv": 1.700000029813964e-05,
+      "CanonicalizeDAGForPGTiling": 0.2245333194732666,
+      "CanonicalizeForTensorizer": 0.0003549999964889139,
+      "CanonicalizeIR": 0.07496881484985352,
+      "Canonicalizer": 0.00687999976798892,
+      "CoalesceCCOp": 0.20974230766296387,
+      "CommuteConcat": 0.03844571113586426,
+      "DMALocalityOpt": 0.0490877628326416,
+      "DMAProfiler": 0.09942150115966797,
+      "DMATilingProfiler": 0.09033346176147461,
+      "DataLocalityOpt": 2.409433364868164,
+      "DataStreaming": 0.16509604454040527,
+      "DeConcat": 0.035332441329956055,
+      "DeadCodeElimination": 0.0381770133972168,
+      "DeadStoreElimination": 1.5507030487060547,
+      "DelinearIndices": 0.39624762535095215,
+      "Delinearization": 0.15817999839782715,
+      "DoNothing": 7.104873657226563e-05,
+      "DramToDramTranspose": 0.09848809242248535,
+      "DumpGraphAndMetadata": 0.2141716480255127,
+      "EliminateDivs": 0.19814848899841309,
+      "ExpandBatchNorm": 0.0714106559753418,
+      "ExpandISAMacro": 0.10404038429260254,
+      "FactorizeBlkDims": 0.7872159481048584,
+      "FactorizeThreadAxesInFreeDims": 0.06115102767944336,
+      "FlattenMacroLoop": 0.10332107543945313,
+      "GenericAccessSimplifier": 0.03612351417541504,
+      "HoistCompute": 4.70000013592653e-05,
+      "IdentifyCrossPassTensors": 0.0004290000069886446,
+      "InferInitValue": 1.2103745937347412,
+      "InferIntrinsicOnCC": 0.49721264839172363,
+      "InferNeuronTensor": 2.2387959957122803,
+      "InferNonlocalTensors": 6.154020309448242,
+      "InferPSumTensor": 1.2426848411560059,
+      "InlineNativeKernels": 0.31959033012390137,
+      "InsertIOTransposes": 1.2181267738342285,
+      "InsertLocalTransposes": 1.086057424545288,
+      "InsertOffloadedTransposes": 0.1012120246887207,
+      "LICM": 0.1255204677581787,
+      "LateLegalizeInst": 0.19177460670471191,
+      "LateLegalizePostSplit": 0.10815072059631348,
+      "LateLowerReshapeOp": 0.045404911041259766,
+      "LateLowerTensorOp": 0.3547041416168213,
+      "LateNeuronInstComb": 0.4670724868774414,
+      "LayoutPreprocessing": 1.1901025772094727,
+      "LayoutPreprocessingAndAnalysis": 1.6328880786895752,
+      "LayoutRequirementAnalysis": 0.42856860160827637,
+      "LegalizeCCOpLayout": 0.08699345588684082,
+      "LegalizeOpLevelAlias": 0.03149080276489258,
+      "LegalizePartitionReduce": 0.09608721733093262,
+      "LegalizeSundaAccess": 1.5293858051300049,
+      "LegalizeSundaMacro": 0.44698476791381836,
+      "LegalizeType": 0.2130870819091797,
+      "LocalLayoutOpt": 0.8399438858032227,
+      "LoopFusion": 0.40386009216308594,
+      "LoopSplitting": 0.05149984359741211,
+      "LowerBroadcast": 0.11290383338928223,
+      "LowerCCOpBlockAxis": 0.2651100158691406,
+      "LowerComplexBroadcast": 0.1815800666809082,
+      "LowerIntrinsics": 1.2034423351287842,
+      "LowerTensorOp": 0.515345573425293,
+      "LowerTranspose": 0.5510139465332031,
+      "MacroGeneration": 3.3921492099761963,
+      "MaskPropagation": 0.14800381660461426,
+      "MemcastMotion": 0.0002300000051036477,
+      "MemcpyElimination": 5.45711612701416,
+      "MutateDataType": 0.04850482940673828,
+      "NeuronAliasDependencyInduction": 0.028447866439819336,
+      "NeuronAliasDependencyReset": 0.04381752014160156,
+      "NeuronInstComb": 0.20636940002441406,
+      "NeuronLICM": 0.3387613296508789,
+      "NeuronLoopFusion": 1.5814118385314941,
+      "NeuronLoopInterchange": 0.06079745292663574,
+      "NeuronSimplifier": 0.4541950225830078,
+      "NeuronSimplifyPredicates": 0.0973823070526123,
+      "NeuronValueNumbering": 0.11516690254211426,
+      "OptimizeAliasedCopyChain": 0.018416881561279297,
+      "OptimizeNKIKernels": 0.07892012596130371,
+      "PAGLayoutOpt": 7.157426357269287,
+      "PComputeCutting": 0.45456743240356445,
+      "PGLayoutTilingPipeline": 24.0252628326416,
+      "PGTiling": 6.715877532958984,
+      "PadElimination": 0.013921260833740234,
+      "ParAxesAnnotation": 6.056151390075684,
+      "PartialLoopFusion": 0.4644014835357666,
+      "PartialSimdFusion": 0.4906351566314697,
+      "PenguinizeFunctions": 0.00021300000662449747,
+      "PerfectLoopNest": 0.06508874893188477,
+      "PruneFunctions": 0.0007450000266544521,
+      "RecognizeOpIdiom": 0.2098982334136963,
+      "Recompute": 0.008437871932983398,
+      "RelaxPredicates": 0.1717524528503418,
+      "Rematerialization": 0.265545129776001,
+      "RemoveOptimizationBarriers": 0.0005959999980404973,
+      "ReshapeWeights": 0.021679162979125977,
+      "ResolveAccessConflict": 0.26529383659362793,
+      "ResolveComplicatePredicates": 0.057276248931884766,
+      "RewriteReplicationMatmul": 0.05362248420715332,
+      "RewriteWeights": 0.06288814544677734,
+      "SFKVectorizer": 7.441895961761475,
+      "ScatterMotion": 0.003945999778807163,
+      "SimpleAllReduceTiling": 0.0798797607421875,
+      "Simplifier": 0.12714624404907227,
+      "SimplifyMacroPredicates": 0.21231913566589355,
+      "SimplifyNeuronTensor": 0.36804652214050293,
+      "SimplifySlice": 0.03702497482299805,
+      "SimplifyTensor": 0.24286293983459473,
+      "SpillPSum": 0.6947588920593262,
+      "SplitAPUnionSets": 0.5079879760742188,
+      "SplitAccGrp": 0.05273175239562988,
+      "StaticProfiler": 0.1567850112915039,
+      "StaticTransposeLocalTensor": 0.46353960037231445,
+      "SundaISel": 1.5079319477081299,
+      "TCTransform": 0.04103660583496094,
+      "TensorInitialization": 0.17437958717346191,
+      "TensorOpSimplifier": 0.34393739700317383,
+      "TensorOpTransform": 1.1691737174987793,
+      "TensorizerLegalizationPass": 0.00018099999579135329,
+      "TileCCOps": 0.24624872207641602,
+      "TilingProfiler": 0.542656421661377,
+      "TransformConvOp": 0.13129019737243652,
+      "TritiumFusion": 1.9942443370819092,
+      "ValueNumbering": 0.11710119247436523,
+      "VectorizeDMA": 0.14786601066589355,
+      "VectorizeMatMult": 0.055516958236694336,
+      "VerifySupportedOps": 0.0003000000142492354,
+      "WeightCoalescing": 0.06569314002990723,
+      "ZeroSizeTensorElimination": 0.00036597251892089844,
+      "algsimp": 0.0020069999154657125,
+      "batchnorm_expander": 0.0007229999755509198,
+      "boundary-marker-removal": 0.0003640000068116933,
+      "call-inliner": 0.0002280000044265762,
+      "canonicalize-boundary-marker": 0.00044999999227002263,
+      "collective-stream-id-checker": 4.70000013592653e-05,
+      "comparison-expander": 0.0003969999961555004,
+      "computation-deduplicator": 0.00042600001324899495,
+      "config-lowering": 0.0001900000061141327,
+      "constant_folding": 0.000155999994603917,
+      "cse": 0.0004360000020824373,
+      "dce": 3.600000127335079e-05,
+      "dynamic-slice-transpose": 0.00014400000509340316,
+      "eliminate-redundant-compare": 0.0001429999974789098,
+      "emit-offloaded-dropout": 0.00024300000222865492,
+      "flatten-call-graph": 0.0002789999998640269,
+      "fuse-send-recv": 0.0013989999424666166,
+      "hilo-conditional-to-select": 8.099999831756577e-05,
+      "hilo::LegalizeAlias": 0.0032820000778883696,
+      "hilo::NeuronInstCombine": 0.0011530000483617187,
+      "hilo::NeuronOpFusion": 0.0002010000025620684,
+      "hilo::ReplaceTokenTypeWithU8Pass": 0.00039900001138448715,
+      "hilo::ScheduleFusion": 3.5000000934815034e-05,
+      "hilo::SixtyFourHack": 0.0005590000073425472,
+      "hilo::VerifyAliasing": 7.000000186963007e-05,
+      "hlo-mac-count": 0.0004199999966658652,
+      "io-con-pipe-begin": 4.999999873689376e-06,
+      "io-con-pipe-end": 9.999999974752427e-07,
+      "io-layout-normalization": 0.0008989999769255519,
+      "legalize-ccops-for-tensorizer": 1.5999999959603883e-05,
+      "legalize-compare": 0.0003650000144261867,
+      "lower-argminmax-custom-call": 0.00015700000221841037,
+      "map-inline": 0.0006140000186860561,
+      "metadata-naming": 0.0009309999877586961,
+      "mlir::detail::OpToOpPassAdaptor": 0.00030700000934302807,
+      "mlir::hlo::MhloToPyPenguin": 0.02938299998641014,
+      "mlir::mhlo::LowerComplexExtraPass": 0.0031610000878572464,
+      "mlir::mhlo::LowerComplexPass": 0.0037410000804811716,
+      "native-to-custom-softmax": 0.00034500000765547156,
+      "native-to-custom-softmax-dx": 0.0004039999912492931,
+      "neuron-hlo-verifier": 0.017588000744581223,
+      "operand_upcaster": 0.0006549999816343188,
+      "post-par-pipe-begin": 9.999999974752427e-07,
+      "post-par-pipe-end": 0.0,
+      "post-partition-simplification": 0.061535999178886414,
+      "pre-hlo-begin": 3.999999989900971e-06,
+      "pre-hlo-end": 9.999999974752427e-07,
+      "replace-minimum-constant": 0.0001880000054370612,
+      "reshape-mover": 7.000000186963007e-05,
+      "simplify-concat": 0.0017259999876841903,
+      "simplify-while-loops": 4.8999998398358e-05,
+      "transform-variadic-reduce": 0.0006210000137798488,
+      "tuple-simplifier": 0.00017600000137463212,
+      "unpack-nested-aws-ntwsr": 0.00033000000985339284,
+      "unroll-while-loop": 9.000000318337698e-06
+    },
+    "hilo": {
+      "HloMacCount": 16344449024.0,
+      "Traffic": 8801719296.0
+    },
+    "tensorizer": {
+      "DMATilingProfiler::TotalInstructionsAfterTiling": 314028,
+      "StaticProfiler::AifUb": 16.346195220947266,
+      "StaticProfiler::ArithmeticIntensityTensorizer": 18.758495330810547,
+      "StaticProfiler::AverageDmaLength": 5336.5810546875,
+      "StaticProfiler::DDRTransferBytes": 8194615604,
+      "StaticProfiler::InternalTransferBytes": 976258560,
+      "StaticProfiler::LoadExpanded": 1396391,
+      "StaticProfiler::StoreExpanded": 79617,
+      "StaticProfiler::TotalDMAExpanded": 1476008,
+      "StaticProfiler::TotalDynamicInstancesCount": 327331,
+      "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 320559,
+      "StaticProfiler::TotalLNCComm": 0,
+      "StaticProfiler::TotalLNCCommTransfer": 0,
+      "TilingProfiler::BatchnormInstructionsAfterTiling": 0,
+      "TilingProfiler::DmaInstructionsAfterTiling": 0,
+      "TilingProfiler::GenericInstructionsAfterTiling": 352,
+      "TilingProfiler::MatMultInstructionsAfterTiling": 250560,
+      "TilingProfiler::NumPfTransposes": 366,
+      "TilingProfiler::NumPfTransposesForIo": 39,
+      "TilingProfiler::NumPfTransposesForLocal": 182,
+      "TilingProfiler::NumPfTransposesForNonlocal": 145,
+      "TilingProfiler::PfTransposeInstructions": 34596,
+      "TilingProfiler::PfTransposeInstructionsForIo": 28280,
+      "TilingProfiler::PfTransposeInstructionsForLocal": 1668,
+      "TilingProfiler::PfTransposeInstructionsForNonlocal": 4648,
+      "TilingProfiler::ReduceInstructionsAfterTiling": 720,
+      "TilingProfiler::SimdInstructionsAfterTiling": 10459,
+      "TilingProfiler::TotalInstructionsAfterTiling": 0,
+      "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0,
+      "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0,
+      "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0,
+      "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0,
+      "TransformConvOp::conv2d_column_packing": 0,
+      "TransformConvOp::conv2d_column_packing_1": 0,
+      "TransformConvOp::conv2d_column_packing_io10": 0,
+      "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0
+    }
+  },
+  "all": {
+    "compiletime": {
+      "CanonicalizeConv": 1.700000029813964e-05,
+      "CanonicalizeForTensorizer": 0.0003549999964889139,
+      "Canonicalizer": 0.00687999976798892,
+      "HoistCompute": 4.70000013592653e-05,
+      "IdentifyCrossPassTensors": 0.0004290000069886446,
+      "MemcastMotion": 0.0002300000051036477,
+      "PenguinizeFunctions": 0.00021300000662449747,
+      "PruneFunctions": 0.0007450000266544521,
+      "RemoveOptimizationBarriers": 0.0005959999980404973,
+      "ScatterMotion": 0.003945999778807163,
+      "TensorizerLegalizationPass": 0.00018099999579135329,
+      "VerifySupportedOps": 0.0003000000142492354,
+      "algsimp": 0.0020069999154657125,
+      "batchnorm_expander": 0.0007229999755509198,
+      "boundary-marker-removal": 0.0003640000068116933,
+      "call-inliner": 0.0002280000044265762,
+      "canonicalize-boundary-marker": 0.00044999999227002263,
+      "collective-stream-id-checker": 4.70000013592653e-05,
+      "comparison-expander": 0.0003969999961555004,
+      "computation-deduplicator": 0.00042600001324899495,
+      "config-lowering": 0.0001900000061141327,
+      "constant_folding": 0.000155999994603917,
+      "cse": 0.0004360000020824373,
+      "dce": 3.600000127335079e-05,
+      "dynamic-slice-transpose": 0.00014400000509340316,
+      "eliminate-redundant-compare": 0.0001429999974789098,
+      "emit-offloaded-dropout": 0.00024300000222865492,
+      "flatten-call-graph": 0.0002789999998640269,
+      "fuse-send-recv": 0.0013989999424666166,
+      "hilo-conditional-to-select": 8.099999831756577e-05,
+      "hilo::LegalizeAlias": 0.0032820000778883696,
+      "hilo::NeuronInstCombine": 0.0011530000483617187,
+      "hilo::NeuronOpFusion": 0.0002010000025620684,
+      "hilo::ReplaceTokenTypeWithU8Pass": 0.00039900001138448715,
+      "hilo::ScheduleFusion": 3.5000000934815034e-05,
+      "hilo::SixtyFourHack": 0.0005590000073425472,
+      "hilo::VerifyAliasing": 7.000000186963007e-05,
+      "hlo-mac-count": 0.0004199999966658652,
+      "io-con-pipe-begin": 4.999999873689376e-06,
+      "io-con-pipe-end": 9.999999974752427e-07,
+      "io-layout-normalization": 0.0008989999769255519,
+      "legalize-ccops-for-tensorizer": 1.5999999959603883e-05,
+      "legalize-compare": 0.0003650000144261867,
+      "lower-argminmax-custom-call": 0.00015700000221841037,
+      "map-inline": 0.0006140000186860561,
+      "metadata-naming": 0.0009309999877586961,
+      "mlir::detail::OpToOpPassAdaptor": 0.00030700000934302807,
+      "mlir::hlo::MhloToPyPenguin": 0.02938299998641014,
+      "mlir::mhlo::LowerComplexExtraPass": 0.0031610000878572464,
+      "mlir::mhlo::LowerComplexPass": 0.0037410000804811716,
+      "native-to-custom-softmax": 0.00034500000765547156,
+      "native-to-custom-softmax-dx": 0.0004039999912492931,
+      "neuron-hlo-verifier": 0.017588000744581223,
+      "operand_upcaster": 0.0006549999816343188,
+      "post-par-pipe-begin": 9.999999974752427e-07,
+      "post-par-pipe-end": 0.0,
+      "post-partition-simplification": 0.061535999178886414,
+      "pre-hlo-begin": 3.999999989900971e-06,
+      "pre-hlo-end": 9.999999974752427e-07,
+      "replace-minimum-constant": 0.0001880000054370612,
+      "reshape-mover": 7.000000186963007e-05,
+      "simplify-concat": 0.0017259999876841903,
+      "simplify-while-loops": 4.8999998398358e-05,
+      "transform-variadic-reduce": 0.0006210000137798488,
+      "tuple-simplifier": 0.00017600000137463212,
+      "unpack-nested-aws-ntwsr": 0.00033000000985339284,
+      "unroll-while-loop": 9.000000318337698e-06
+    }
+  },
+  "sg00": {
+    "hilo": {
+      "ArithmeticIntensity": 3.7139217853546143,
+      "HloMacCount": 16344449024.0,
+      "Traffic": 8801719296.0
+    }
+  },
+  "sg0000": {
+    "compiletime": {
+      "AGOrderingAnalysisPass": 2.1720478534698486,
+      "AffinePredicateResolution": 0.05626630783081055,
+      "AliasDependencyElimination": 0.0026874542236328125,
+      "AliasDependencyInduction": 0.5170383453369141,
+      "AliasDependencyReset": 2.2118747234344482,
+      "BFComputeCutting": 0.12706279754638672,
+      "BirCodeGenLoop": 2.4660451412200928,
+      "CCOpFusion": 0.8360562324523926,
+      "CanonicalizeDAGForPGTiling": 0.2245333194732666,
+      "CanonicalizeIR": 0.07496881484985352,
+      "CoalesceCCOp": 0.20974230766296387,
+      "CommuteConcat": 0.03844571113586426,
+      "DMALocalityOpt": 0.0490877628326416,
+      "DMAProfiler": 0.09942150115966797,
+      "DMATilingProfiler": 0.09033346176147461,
+      "DataLocalityOpt": 2.409433364868164,
+      "DataStreaming": 0.16509604454040527,
+      "DeConcat": 0.035332441329956055,
+      "DeadCodeElimination": 0.0381770133972168,
+      "DeadStoreElimination": 1.5507030487060547,
+      "DelinearIndices": 0.39624762535095215,
+      "Delinearization": 0.15817999839782715,
+      "DoNothing": 7.104873657226563e-05,
+      "DramToDramTranspose": 0.09848809242248535,
+      "DumpGraphAndMetadata": 0.2141716480255127,
+      "EliminateDivs": 0.19814848899841309,
+      "ExpandBatchNorm": 0.0714106559753418,
+      "ExpandISAMacro": 0.10404038429260254,
+      "FactorizeBlkDims": 0.786508321762085,
+      "FactorizeThreadAxesInFreeDims": 0.06115102767944336,
+      "FlattenMacroLoop": 0.10332107543945313,
+      "GenericAccessSimplifier": 0.03612351417541504,
+      "InferInitValue": 1.2103745937347412,
+      "InferIntrinsicOnCC": 0.49721264839172363,
+      "InferNeuronTensor": 2.2387959957122803,
+      "InferNonlocalTensors": 6.154020309448242,
+      "InferPSumTensor": 1.2426848411560059,
+      "InlineNativeKernels": 0.31959033012390137,
+      "InsertIOTransposes": 1.2181267738342285,
+      "InsertLocalTransposes": 1.086057424545288,
+      "InsertOffloadedTransposes": 0.1012120246887207,
+      "LICM": 0.1255204677581787,
+      "LateLegalizeInst": 0.19177460670471191,
+      "LateLegalizePostSplit": 0.10815072059631348,
+      "LateLowerReshapeOp": 0.045404911041259766,
+      "LateLowerTensorOp": 0.3547041416168213,
+      "LateNeuronInstComb": 0.4665071964263916,
+      "LayoutPreprocessing": 1.1901025772094727,
+      "LayoutPreprocessingAndAnalysis": 1.6328880786895752,
+      "LayoutRequirementAnalysis": 0.42856860160827637,
+      "LegalizeCCOpLayout": 0.08699345588684082,
+      "LegalizeOpLevelAlias": 0.03149080276489258,
+      "LegalizePartitionReduce": 0.09608721733093262,
+      "LegalizeSundaAccess": 1.5293858051300049,
+      "LegalizeSundaMacro": 0.44698476791381836,
+      "LegalizeType": 0.2130870819091797,
+      "LocalLayoutOpt": 0.8399438858032227,
+      "LoopFusion": 0.40386009216308594,
+      "LoopSplitting": 0.05149984359741211,
+      "LowerBroadcast": 0.11268091201782227,
+      "LowerCCOpBlockAxis": 0.2651100158691406,
+      "LowerComplexBroadcast": 0.1815800666809082,
+      "LowerIntrinsics": 1.2032275199890137,
+      "LowerTensorOp": 0.515345573425293,
+      "LowerTranspose": 0.5507981777191162,
+      "MacroGeneration": 3.3921492099761963,
+      "MaskPropagation": 0.14800381660461426,
+      "MemcpyElimination": 5.45711612701416,
+      "MutateDataType": 0.04850482940673828,
+      "NeuronAliasDependencyInduction": 0.028447866439819336,
+      "NeuronAliasDependencyReset": 0.04381752014160156,
+      "NeuronInstComb": 0.20571517944335938,
+      "NeuronLICM": 0.3387613296508789,
+      "NeuronLoopFusion": 1.5814118385314941,
+      "NeuronLoopInterchange": 0.06079745292663574,
+      "NeuronSimplifier": 0.4541950225830078,
+      "NeuronSimplifyPredicates": 0.0973823070526123,
+      "NeuronValueNumbering": 0.11469674110412598,
+      "OptimizeAliasedCopyChain": 0.018416881561279297,
+      "OptimizeNKIKernels": 0.07892012596130371,
+      "PAGLayoutOpt": 7.157426357269287,
+      "PComputeCutting": 0.45456743240356445,
+      "PGLayoutTilingPipeline": 24.0252628326416,
+      "PGTiling": 6.715877532958984,
+      "PadElimination": 0.013921260833740234,
+      "ParAxesAnnotation": 6.056151390075684,
+      "PartialLoopFusion": 0.4644014835357666,
+      "PartialSimdFusion": 0.4906351566314697,
+      "PerfectLoopNest": 0.06508874893188477,
+      "RecognizeOpIdiom": 0.2098982334136963,
+      "Recompute": 0.008437871932983398,
+      "RelaxPredicates": 0.1717524528503418,
+      "Rematerialization": 0.265545129776001,
+      "ReshapeWeights": 0.021679162979125977,
+      "ResolveAccessConflict": 0.26529383659362793,
+      "ResolveComplicatePredicates": 0.057276248931884766,
+      "RewriteReplicationMatmul": 0.05362248420715332,
+      "RewriteWeights": 0.06288814544677734,
+      "SFKVectorizer": 7.441895961761475,
+      "SimpleAllReduceTiling": 0.0798797607421875,
+      "Simplifier": 0.12714624404907227,
+      "SimplifyMacroPredicates": 0.21231913566589355,
+      "SimplifyNeuronTensor": 0.36804652214050293,
+      "SimplifySlice": 0.03702497482299805,
+      "SimplifyTensor": 0.24286293983459473,
+      "SpillPSum": 0.6902801990509033,
+      "SplitAPUnionSets": 0.5079879760742188,
+      "SplitAccGrp": 0.05273175239562988,
+      "StaticProfiler": 0.1567850112915039,
+      "StaticTransposeLocalTensor": 0.46353960037231445,
+      "SundaISel": 1.5079319477081299,
+      "TCTransform": 0.04103660583496094,
+      "TensorInitialization": 0.17437958717346191,
+      "TensorOpSimplifier": 0.34393739700317383,
+      "TensorOpTransform": 1.1691737174987793,
+      "TileCCOps": 0.24624872207641602,
+      "TilingProfiler": 0.542656421661377,
+      "TransformConvOp": 0.13129019737243652,
+      "TritiumFusion": 1.9942443370819092,
+      "ValueNumbering": 0.11710119247436523,
+      "VectorizeDMA": 0.14786601066589355,
+      "VectorizeMatMult": 0.055516958236694336,
+      "WeightCoalescing": 0.06569314002990723,
+      "ZeroSizeTensorElimination": 0.00036597251892089844
+    },
+    "tensorizer": {
+      "DMATilingProfiler::TotalInstructionsAfterTiling": 314028,
+      "StaticProfiler::AifUb": 16.346195220947266,
+      "StaticProfiler::ArithmeticIntensityTensorizer": 18.758495330810547,
+      "StaticProfiler::AverageDmaLength": 5336.5810546875,
+      "StaticProfiler::AverageFractalPeUtilization": 99.34062957763672,
+      "StaticProfiler::AveragePartitionUtilization": 98.38597106933594,
+      "StaticProfiler::AveragePeUtilization": 97.22911071777344,
+      "StaticProfiler::DDRTransferBytes": 8194615604,
+      "StaticProfiler::InternalTransferBytes": 976258560,
+      "StaticProfiler::LoadExpanded": 1396391,
+      "StaticProfiler::LocalizationEfficiency": 114.75756072998047,
+      "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 114.9507064819336,
+      "StaticProfiler::StoreExpanded": 79617,
+      "StaticProfiler::TotalDMAExpanded": 1476008,
+      "StaticProfiler::TotalDynamicInstancesCount": 327331,
+      "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 320559,
+      "StaticProfiler::TotalLNCComm": 0,
+      "StaticProfiler::TotalLNCCommTransfer": 0,
+      "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0,
+      "TilingProfiler::AveragePeUtilizationAfterTiling": 0,
+      "TilingProfiler::BatchnormInstructionsAfterTiling": 0,
+      "TilingProfiler::DmaInstructionsAfterTiling": 0,
+      "TilingProfiler::GenericInstructionsAfterTiling": 352,
+      "TilingProfiler::MatMultInstructionsAfterTiling": 250560,
+      "TilingProfiler::NumPfTransposes": 366,
+      "TilingProfiler::NumPfTransposesForIo": 39,
+      "TilingProfiler::NumPfTransposesForLocal": 182,
+      "TilingProfiler::NumPfTransposesForNonlocal": 145,
+      "TilingProfiler::PfTransposeInstructions": 34596,
+      "TilingProfiler::PfTransposeInstructionsForIo": 28280,
+      "TilingProfiler::PfTransposeInstructionsForLocal": 1668,
+      "TilingProfiler::PfTransposeInstructionsForNonlocal": 4648,
+      "TilingProfiler::ReduceInstructionsAfterTiling": 720,
+      "TilingProfiler::SimdInstructionsAfterTiling": 10459,
+      "TilingProfiler::TotalInstructionsAfterTiling": 0,
+      "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0,
+      "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0,
+      "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0,
+      "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0,
+      "TransformConvOp::conv2d_column_packing": 0,
+      "TransformConvOp::conv2d_column_packing_1": 0,
+      "TransformConvOp::conv2d_column_packing_io10": 0,
+      "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0
+    }
+  },
+  "tiled_dve_transpose_10tiled_dve_transpose_10_sg0000": {
+    "compiletime": {
+      "FactorizeBlkDims": 0.0007076263427734375,
+      "LateNeuronInstComb": 0.0005652904510498047,
+      "LowerBroadcast": 0.00022292137145996094,
+      "LowerIntrinsics": 0.0002148151397705078,
+      "LowerTranspose": 0.00021576881408691406,
+      "NeuronInstComb": 0.0006542205810546875,
+      "NeuronValueNumbering": 0.00047016143798828125,
+      "SpillPSum": 0.0044786930084228516
+    }
+  }
+}

token_generation_model/_tp0_bk0/graph.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8207c614c7812232bfe4e0f280b5ab5a81bd24487594cbd5adcfb071e41473e6
+size 10415104

token_generation_model/_tp0_bk0/log-neuron-cc.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

token_generation_model/_tp0_bk0/metaneff.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19c84ba107b6d20879fc4c203176a3181065b3d48b68ae67a0b2e7bae597866c
+size 928218

token_generation_model/_tp0_bk0/model.MODULE_8f245c7816a398e13e79+a9d440f5.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69db8e5767ece577c8c0a9b48e73695bb6c31927dac9bf48d0f2ecdf5265ec9a
+size 904963

token_generation_model/_tp0_bk0/model.MODULE_8f245c7816a398e13e79+a9d440f5.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8207c614c7812232bfe4e0f280b5ab5a81bd24487594cbd5adcfb071e41473e6
+size 10415104

token_generation_model/_tp0_bk0/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1328f1067680c65a20e2976176e1438ecab6cb49d33d216e7cdd47d6b141f2f4
+size 10590211

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9417dfa2470f086897a0fa5acf4c11e1b05646717bdd7f9d4dc119332c65d421
+size 11422919

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,247 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if message.content is string %}\n        {%- set content = message.content %}\n    {%- else %}\n        {%- set content = '' %}\n    {%- endif %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is string %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in content %}\n                {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n                {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- '<think>\\n\\n</think>\\n\\n' }}\n    {%- endif %}\n{%- endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "max_length": 512,
+  "model_max_length": 131072,
+  "pad_to_multiple_of": null,
+  "pad_token": "<|endoftext|>",
+  "pad_token_type_id": 0,
+  "padding_side": "left",
+  "split_special_tokens": false,
+  "stride": 0,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff