Upload folder using huggingface_hub
Browse files- .gitattributes +8 -0
- README.md +176 -0
- WEIGHTS_README.md +36 -0
- added_tokens.json +28 -0
- config.json +69 -0
- context_encoding_model/_tp0_bk0/command.txt +1 -0
- context_encoding_model/_tp0_bk0/compile_flags.MODULE_e80578c547275f02c0fa+ed72d204.json +1 -0
- context_encoding_model/_tp0_bk0/global_metric_store.json +1051 -0
- context_encoding_model/_tp0_bk0/graph.neff +3 -0
- context_encoding_model/_tp0_bk0/log-neuron-cc.txt +0 -0
- context_encoding_model/_tp0_bk0/metaneff.pb +3 -0
- context_encoding_model/_tp0_bk0/model.MODULE_e80578c547275f02c0fa+ed72d204.hlo_module.pb +3 -0
- context_encoding_model/_tp0_bk0/model.MODULE_e80578c547275f02c0fa+ed72d204.neff +3 -0
- layout_opt/command.txt +1 -0
- layout_opt/graph.neff +3 -0
- layout_opt/log-neuron-cc.txt +0 -0
- layout_opt/metaneff +982 -0
- layout_opt/model/graph.hlo +3 -0
- merges.txt +0 -0
- model.pt +3 -0
- neuron_config.json +43 -0
- special_tokens_map.json +31 -0
- token_generation_model/_tp0_bk0/command.txt +1 -0
- token_generation_model/_tp0_bk0/compile_flags.MODULE_8f245c7816a398e13e79+a9d440f5.json +1 -0
- token_generation_model/_tp0_bk0/global_metric_store.json +524 -0
- token_generation_model/_tp0_bk0/graph.neff +3 -0
- token_generation_model/_tp0_bk0/log-neuron-cc.txt +0 -0
- token_generation_model/_tp0_bk0/metaneff.pb +3 -0
- token_generation_model/_tp0_bk0/model.MODULE_8f245c7816a398e13e79+a9d440f5.hlo_module.pb +3 -0
- token_generation_model/_tp0_bk0/model.MODULE_8f245c7816a398e13e79+a9d440f5.neff +3 -0
- token_generation_model/_tp0_bk0/wrapped_neff.hlo +3 -0
- tokenizer.json +3 -0
- tokenizer_config.json +247 -0
- vocab.json +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
context_encoding_model/_tp0_bk0/graph.neff filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
context_encoding_model/_tp0_bk0/model.MODULE_e80578c547275f02c0fa+ed72d204.neff filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
layout_opt/graph.neff filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
layout_opt/model/graph.hlo filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
token_generation_model/_tp0_bk0/graph.neff filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
token_generation_model/_tp0_bk0/model.MODULE_8f245c7816a398e13e79+a9d440f5.neff filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
token_generation_model/_tp0_bk0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
license: apache-2.0
|
| 5 |
+
pipeline_tag: text-generation
|
| 6 |
+
tags:
|
| 7 |
+
- chess
|
| 8 |
+
- neuron
|
| 9 |
+
- aws-trainium
|
| 10 |
+
- vllm
|
| 11 |
+
- optimum-neuron
|
| 12 |
+
- continuous-batching
|
| 13 |
+
base_model: karanps/ChessLM_Qwen3
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
# ChessLM Qwen3 - Neuron Traced (AWS Format Structure)
|
| 17 |
+
|
| 18 |
+
This is a Neuron-traced version of [karanps/ChessLM_Qwen3](https://huggingface.co/karanps/ChessLM_Qwen3) optimized for AWS Trainium (trn1) and Inferentia (inf2) instances using vLLM with **continuous batching enabled**.
|
| 19 |
+
|
| 20 |
+
This model follows the AWS Neuron repository structure with separate directories for compiled artifacts.
|
| 21 |
+
|
| 22 |
+
## Model Details
|
| 23 |
+
|
| 24 |
+
- **Base Model**: Qwen3-8B fine-tuned for chess
|
| 25 |
+
- **Compilation**: optimum-neuron[vllm]==0.3.0
|
| 26 |
+
- **Compiler Version**: neuronxcc 2.21.33363.0
|
| 27 |
+
- **Target Hardware**: AWS Trainium (trn1) / Inferentia (inf2)
|
| 28 |
+
- **Precision**: BF16
|
| 29 |
+
- **Tensor Parallelism**: 2 cores
|
| 30 |
+
- **Batch Size**: 4 (continuous batching enabled)
|
| 31 |
+
- **Max Sequence Length**: 2048
|
| 32 |
+
|
| 33 |
+
## Repository Structure
|
| 34 |
+
|
| 35 |
+
This repository follows the AWS Neuron format with organized directories:
|
| 36 |
+
|
| 37 |
+
```
|
| 38 |
+
├── context_encoding_model/
|
| 39 |
+
│ └── _tp0_bk0/
|
| 40 |
+
│ ├── graph.neff
|
| 41 |
+
│ ├── model.MODULE_*.neff
|
| 42 |
+
│ ├── model.MODULE_*.hlo_module.pb
|
| 43 |
+
│ ├── compile_flags.*.json
|
| 44 |
+
│ ├── neuron_config.json
|
| 45 |
+
│ └── log-neuron-cc.txt
|
| 46 |
+
├── token_generation_model/
|
| 47 |
+
│ └── _tp0_bk0/
|
| 48 |
+
│ ├── graph.neff
|
| 49 |
+
│ ├── model.MODULE_*.neff
|
| 50 |
+
│ ├── model.MODULE_*.hlo_module.pb
|
| 51 |
+
│ ├── wrapped_neff.hlo
|
| 52 |
+
│ ├── compile_flags.*.json
|
| 53 |
+
│ ├── neuron_config.json
|
| 54 |
+
│ └── log-neuron-cc.txt
|
| 55 |
+
├── layout_opt/
|
| 56 |
+
│ ├── graph.neff
|
| 57 |
+
│ ├── log-neuron-cc.txt
|
| 58 |
+
│ └── model/
|
| 59 |
+
│ └── graph.hlo
|
| 60 |
+
├── model.pt (17GB - contains compiled graphs + weights)
|
| 61 |
+
├── config.json
|
| 62 |
+
├── neuron_config.json
|
| 63 |
+
└── tokenizer files
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
### Key Files
|
| 67 |
+
|
| 68 |
+
- **context_encoding_model/**: Compiled NEFF files for processing initial prompt sequences (up to 2048 tokens)
|
| 69 |
+
- **token_generation_model/**: Compiled NEFF files for autoregressive token generation
|
| 70 |
+
- **layout_opt/**: Layout optimization artifacts from compilation
|
| 71 |
+
- **model.pt**: Main model file containing compiled graphs and embedded weights (17GB)
|
| 72 |
+
- **neuron_config.json**: Neuron compilation configuration
|
| 73 |
+
|
| 74 |
+
## Difference from AWS Reference Format
|
| 75 |
+
|
| 76 |
+
The AWS Neuron reference models (e.g., `aws-neuron/Qwen3-1.7B-TP2-BS8-SEQ4096`) typically have:
|
| 77 |
+
- A `weights/` directory with separate safetensors files (e.g., `tp0_sharded_checkpoint.safetensors`)
|
| 78 |
+
- A smaller model.pt (e.g., ~100MB) containing just the model structure
|
| 79 |
+
|
| 80 |
+
**This model** has:
|
| 81 |
+
- Weights embedded within model.pt (17GB)
|
| 82 |
+
- An empty `weights/` directory (preserved for format compatibility)
|
| 83 |
+
|
| 84 |
+
This is because Neuron-compiled models with optimum-neuron[vllm]==0.3.0 bundle weights within the compiled format. The weights are optimized and embedded in the NEFF (Neuron Executable File Format) during compilation. This is a valid alternative implementation that provides the same functionality.
|
| 85 |
+
|
| 86 |
+
## Requirements
|
| 87 |
+
|
| 88 |
+
```bash
|
| 89 |
+
pip install optimum-neuron[vllm]==0.3.0
|
| 90 |
+
pip install neuronx-distributed --extra-index-url=https://pip.repos.neuron.amazonaws.com
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
## Usage
|
| 94 |
+
|
| 95 |
+
### Loading the Model
|
| 96 |
+
|
| 97 |
+
```python
|
| 98 |
+
from optimum.neuron import NeuronModelForCausalLM
|
| 99 |
+
from transformers import AutoTokenizer
|
| 100 |
+
|
| 101 |
+
# Load the model
|
| 102 |
+
model = NeuronModelForCausalLM.from_pretrained("kunhunjon/ChessLM_Qwen3_Trainium_AWS_Format")
|
| 103 |
+
tokenizer = AutoTokenizer.from_pretrained("kunhunjon/ChessLM_Qwen3_Trainium_AWS_Format")
|
| 104 |
+
|
| 105 |
+
# Run inference
|
| 106 |
+
prompt = "e2e4"
|
| 107 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
| 108 |
+
outputs = model.generate(**inputs, max_new_tokens=20)
|
| 109 |
+
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 110 |
+
print(result)
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
### Hardware Requirements
|
| 114 |
+
|
| 115 |
+
- AWS Trainium (trn1.32xlarge, trn1.2xlarge) or Inferentia (inf2) instances
|
| 116 |
+
- At least 2 Neuron cores (as configured during tracing)
|
| 117 |
+
- Minimum 32GB RAM recommended
|
| 118 |
+
|
| 119 |
+
## Compilation Details
|
| 120 |
+
|
| 121 |
+
This model was traced with the following parameters:
|
| 122 |
+
- `batch_size=4`
|
| 123 |
+
- `sequence_length=2048`
|
| 124 |
+
- `num_cores=2`
|
| 125 |
+
- `auto_cast_type="bf16"`
|
| 126 |
+
- `continuous_batching=True`
|
| 127 |
+
|
| 128 |
+
### Compilation Artifacts
|
| 129 |
+
|
| 130 |
+
The separate directories contain all compilation artifacts:
|
| 131 |
+
- **NEFF files**: Neuron Executable File Format - the compiled compute graphs
|
| 132 |
+
- **HLO files**: High-Level Operations - intermediate representation
|
| 133 |
+
- **Compilation logs**: Detailed logs from neuronx-cc compiler
|
| 134 |
+
- **Metadata**: Configuration and metrics from compilation
|
| 135 |
+
|
| 136 |
+
### Continuous Batching
|
| 137 |
+
|
| 138 |
+
This model is compiled with **continuous batching enabled**, which allows vLLM to:
|
| 139 |
+
- Process multiple requests simultaneously with dynamic batch sizes up to 4
|
| 140 |
+
- Optimize throughput by batching requests with different sequence lengths
|
| 141 |
+
- Reduce latency for concurrent inference workloads
|
| 142 |
+
|
| 143 |
+
**Note**: On-device sampling is disabled due to a known Neuron runtime limitation when using tensor parallelism with 2 cores. Sampling is handled on the host instead.
|
| 144 |
+
|
| 145 |
+
## Compilation Metrics
|
| 146 |
+
|
| 147 |
+
- **Total compilation time**: ~8.1 minutes
|
| 148 |
+
- **Token generation model**: 219 seconds
|
| 149 |
+
- **Context encoding model**: 165 seconds
|
| 150 |
+
- **Compiler**: neuronxcc 2.21.33363.0
|
| 151 |
+
- **Model size**: 17GB (with embedded weights)
|
| 152 |
+
|
| 153 |
+
## Model Files
|
| 154 |
+
|
| 155 |
+
| File | Purpose |
|
| 156 |
+
|------|---------|
|
| 157 |
+
| model.pt | Main model with embedded weights (17GB) |
|
| 158 |
+
| config.json | Base model configuration |
|
| 159 |
+
| neuron_config.json | Neuron compilation settings |
|
| 160 |
+
| tokenizer* | Tokenizer files for text processing |
|
| 161 |
+
| context_encoding_model/ | Compiled graphs for prompt processing |
|
| 162 |
+
| token_generation_model/ | Compiled graphs for token generation |
|
| 163 |
+
| layout_opt/ | Weight layout optimization artifacts |
|
| 164 |
+
|
| 165 |
+
## License
|
| 166 |
+
|
| 167 |
+
This model inherits the license from the base model [karanps/ChessLM_Qwen3](https://huggingface.co/karanps/ChessLM_Qwen3).
|
| 168 |
+
|
| 169 |
+
## Citation
|
| 170 |
+
|
| 171 |
+
If you use this model, please cite the original ChessLM model and AWS Neuron tools.
|
| 172 |
+
|
| 173 |
+
## See Also
|
| 174 |
+
|
| 175 |
+
- **Sharded version**: [kunhunjon/ChessLM_Qwen3_Trainium_Sharded](https://huggingface.co/kunhunjon/ChessLM_Qwen3_Trainium_Sharded) - Model split into 9x2GB shards for easier downloading
|
| 176 |
+
- **Standard version**: [kunhunjon/ChessLM_Qwen3_Trainium](https://huggingface.co/kunhunjon/ChessLM_Qwen3_Trainium) - Single model.pt file
|
WEIGHTS_README.md
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Weights Information
|
| 2 |
+
|
| 3 |
+
This model contains weights bundled within model.pt (17GB).
|
| 4 |
+
|
| 5 |
+
In the AWS Neuron reference format, weights are typically stored separately as:
|
| 6 |
+
- `weights/tp0_sharded_checkpoint.safetensors`
|
| 7 |
+
- `weights/tp1_sharded_checkpoint.safetensors`
|
| 8 |
+
|
| 9 |
+
To extract weights to safetensors format, you would need to:
|
| 10 |
+
1. Load the model using optimum-neuron
|
| 11 |
+
2. Extract the state_dict
|
| 12 |
+
3. Convert to safetensors format
|
| 13 |
+
4. Shard by tensor parallel rank
|
| 14 |
+
|
| 15 |
+
This is currently not straightforward for compiled Neuron models as the weights
|
| 16 |
+
are embedded in the compiled format.
|
| 17 |
+
|
| 18 |
+
## Current Structure
|
| 19 |
+
|
| 20 |
+
The model.pt file contains:
|
| 21 |
+
- Compiled graphs (NEFF format)
|
| 22 |
+
- Model weights (optimized for Neuron)
|
| 23 |
+
- Runtime metadata
|
| 24 |
+
|
| 25 |
+
The separate directories contain:
|
| 26 |
+
- `context_encoding_model/`: NEFF files for context encoding
|
| 27 |
+
- `token_generation_model/`: NEFF files for token generation
|
| 28 |
+
- `layout_opt/`: Layout optimization artifacts
|
| 29 |
+
|
| 30 |
+
##Usage
|
| 31 |
+
|
| 32 |
+
Load this model using:
|
| 33 |
+
```python
|
| 34 |
+
from optimum.neuron import NeuronModelForCausalLM
|
| 35 |
+
model = NeuronModelForCausalLM.from_pretrained("path/to/model")
|
| 36 |
+
```
|
added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
config.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 4096,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 12288,
|
| 14 |
+
"layer_types": [
|
| 15 |
+
"full_attention",
|
| 16 |
+
"full_attention",
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention"
|
| 51 |
+
],
|
| 52 |
+
"max_position_embeddings": 40960,
|
| 53 |
+
"max_window_layers": 36,
|
| 54 |
+
"model_type": "qwen3",
|
| 55 |
+
"num_attention_heads": 32,
|
| 56 |
+
"num_hidden_layers": 36,
|
| 57 |
+
"num_key_value_heads": 8,
|
| 58 |
+
"pad_token_id": 151643,
|
| 59 |
+
"rms_norm_eps": 1e-06,
|
| 60 |
+
"rope_scaling": null,
|
| 61 |
+
"rope_theta": 1000000,
|
| 62 |
+
"sliding_window": null,
|
| 63 |
+
"tie_word_embeddings": false,
|
| 64 |
+
"torch_dtype": "bfloat16",
|
| 65 |
+
"transformers_version": "4.51.3",
|
| 66 |
+
"use_cache": true,
|
| 67 |
+
"use_sliding_window": false,
|
| 68 |
+
"vocab_size": 151936
|
| 69 |
+
}
|
context_encoding_model/_tp0_bk0/command.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
neuronx-cc compile --framework=XLA model.MODULE_e80578c547275f02c0fa+ed72d204.hlo_module.pb --output model.MODULE_e80578c547275f02c0fa+ed72d204.neff --target=trn1 --auto-cast=none --model-type=transformer '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ' -O2 --lnc=1 --logfile=log-neuron-cc.txt --verbose=35
|
context_encoding_model/_tp0_bk0/compile_flags.MODULE_e80578c547275f02c0fa+ed72d204.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|
context_encoding_model/_tp0_bk0/global_metric_store.json
ADDED
|
@@ -0,0 +1,1051 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Average": {
|
| 3 |
+
"tensorizer": {
|
| 4 |
+
"StaticProfiler::AverageFractalPeUtilization": 99.99919128417969,
|
| 5 |
+
"StaticProfiler::AveragePartitionUtilization": 99.9390869140625,
|
| 6 |
+
"StaticProfiler::AveragePeUtilization": 99.99919128417969,
|
| 7 |
+
"StaticProfiler::LocalizationEfficiency": 52.21323013305664,
|
| 8 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 56.50444793701172,
|
| 9 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
|
| 10 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"Count": {
|
| 14 |
+
"tensorizer": {
|
| 15 |
+
"StaticProfiler::AverageFractalPeUtilization": 1.0,
|
| 16 |
+
"StaticProfiler::AveragePartitionUtilization": 1.0,
|
| 17 |
+
"StaticProfiler::AveragePeUtilization": 1.0,
|
| 18 |
+
"StaticProfiler::LocalizationEfficiency": 1.0,
|
| 19 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1.0,
|
| 20 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 1.0,
|
| 21 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 1.0
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
"Sum": {
|
| 25 |
+
"compiletime": {
|
| 26 |
+
"AGOrderingAnalysisPass": 0.07080268859863281,
|
| 27 |
+
"AffinePredicateResolution": 0.001844644546508789,
|
| 28 |
+
"AliasDependencyElimination": 0.0001308917999267578,
|
| 29 |
+
"AliasDependencyInduction": 0.012178182601928711,
|
| 30 |
+
"AliasDependencyReset": 0.027022123336791992,
|
| 31 |
+
"BFComputeCutting": 0.0061855316162109375,
|
| 32 |
+
"BirCodeGenLoop": 0.17315936088562012,
|
| 33 |
+
"CCOpFusion": 0.08119010925292969,
|
| 34 |
+
"CanonicalizeConv": 0.00011999999696854502,
|
| 35 |
+
"CanonicalizeDAGForPGTiling": 0.0066263675689697266,
|
| 36 |
+
"CanonicalizeForTensorizer": 4.400000034365803e-05,
|
| 37 |
+
"CanonicalizeIR": 0.0033893585205078125,
|
| 38 |
+
"Canonicalizer": 0.0010000000474974513,
|
| 39 |
+
"CoalesceCCOp": 0.0032279491424560547,
|
| 40 |
+
"CommuteConcat": 0.0016355514526367188,
|
| 41 |
+
"DMALocalityOpt": 0.0021250247955322266,
|
| 42 |
+
"DMAProfiler": 0.005837917327880859,
|
| 43 |
+
"DMATilingProfiler": 0.010099172592163086,
|
| 44 |
+
"DataLocalityOpt": 0.25125575065612793,
|
| 45 |
+
"DataStreaming": 0.010326147079467773,
|
| 46 |
+
"DeConcat": 0.002583742141723633,
|
| 47 |
+
"DeadCodeElimination": 0.0018777847290039063,
|
| 48 |
+
"DeadStoreElimination": 0.05162811279296875,
|
| 49 |
+
"DelinearIndices": 0.017117977142333984,
|
| 50 |
+
"Delinearization": 0.006100893020629883,
|
| 51 |
+
"DoNothing": 6.961822509765625e-05,
|
| 52 |
+
"DramToDramTranspose": 0.0047817230224609375,
|
| 53 |
+
"DumpGraphAndMetadata": 0.008226871490478516,
|
| 54 |
+
"EliminateDivs": 0.005579710006713867,
|
| 55 |
+
"ExpandBatchNorm": 0.0024263858795166016,
|
| 56 |
+
"ExpandISAMacro": 0.004798412322998047,
|
| 57 |
+
"FactorizeBlkDims": 0.059967756271362305,
|
| 58 |
+
"FactorizeThreadAxesInFreeDims": 0.003553628921508789,
|
| 59 |
+
"FlattenMacroLoop": 0.0051920413970947266,
|
| 60 |
+
"GenericAccessSimplifier": 0.0013842582702636719,
|
| 61 |
+
"HoistCompute": 1.2999999853491317e-05,
|
| 62 |
+
"IdentifyCrossPassTensors": 7.300000288523734e-05,
|
| 63 |
+
"InferInitValue": 0.07938385009765625,
|
| 64 |
+
"InferIntrinsicOnCC": 0.0170440673828125,
|
| 65 |
+
"InferNeuronTensor": 0.09969878196716309,
|
| 66 |
+
"InferNonlocalTensors": 0.11206626892089844,
|
| 67 |
+
"InferPSumTensor": 0.0999910831451416,
|
| 68 |
+
"InlineNativeKernels": 0.003079652786254883,
|
| 69 |
+
"InsertIOTransposes": 0.031575918197631836,
|
| 70 |
+
"InsertLocalTransposes": 0.01450800895690918,
|
| 71 |
+
"InsertOffloadedTransposes": 0.010621309280395508,
|
| 72 |
+
"LICM": 0.006058454513549805,
|
| 73 |
+
"LateLegalizeInst": 0.009308576583862305,
|
| 74 |
+
"LateLegalizePostSplit": 0.005577564239501953,
|
| 75 |
+
"LateLowerReshapeOp": 0.002005338668823242,
|
| 76 |
+
"LateLowerTensorOp": 0.006224155426025391,
|
| 77 |
+
"LateNeuronInstComb": 0.026279211044311523,
|
| 78 |
+
"LayoutPreprocessing": 0.045662879943847656,
|
| 79 |
+
"LayoutPreprocessingAndAnalysis": 0.08894896507263184,
|
| 80 |
+
"LayoutRequirementAnalysis": 0.014644384384155273,
|
| 81 |
+
"LegalizeCCOpLayout": 0.002945423126220703,
|
| 82 |
+
"LegalizeOpLevelAlias": 0.0015463829040527344,
|
| 83 |
+
"LegalizePartitionReduce": 0.0025038719177246094,
|
| 84 |
+
"LegalizeSundaAccess": 0.05164527893066406,
|
| 85 |
+
"LegalizeSundaMacro": 0.02343463897705078,
|
| 86 |
+
"LegalizeType": 0.007515668869018555,
|
| 87 |
+
"LocalLayoutOpt": 0.05471658706665039,
|
| 88 |
+
"LoopFusion": 0.009645700454711914,
|
| 89 |
+
"LoopSplitting": 0.0005736351013183594,
|
| 90 |
+
"LowerBroadcast": 0.0030760765075683594,
|
| 91 |
+
"LowerCCOpBlockAxis": 0.010100364685058594,
|
| 92 |
+
"LowerComplexBroadcast": 0.004259347915649414,
|
| 93 |
+
"LowerIntrinsics": 0.071380615234375,
|
| 94 |
+
"LowerTensorOp": 0.017409563064575195,
|
| 95 |
+
"LowerTranspose": 2.716614007949829,
|
| 96 |
+
"MacroGeneration": 0.15842843055725098,
|
| 97 |
+
"MaskPropagation": 0.004798412322998047,
|
| 98 |
+
"MemcastMotion": 3.300000025774352e-05,
|
| 99 |
+
"MemcpyElimination": 0.14069795608520508,
|
| 100 |
+
"MutateDataType": 0.0018687248229980469,
|
| 101 |
+
"NeuronAliasDependencyInduction": 0.0005295276641845703,
|
| 102 |
+
"NeuronAliasDependencyReset": 0.014295816421508789,
|
| 103 |
+
"NeuronInstComb": 0.014310121536254883,
|
| 104 |
+
"NeuronLICM": 0.01824188232421875,
|
| 105 |
+
"NeuronLoopFusion": 0.03763270378112793,
|
| 106 |
+
"NeuronLoopInterchange": 0.0033299922943115234,
|
| 107 |
+
"NeuronSimplifier": 0.02371072769165039,
|
| 108 |
+
"NeuronSimplifyPredicates": 0.010996103286743164,
|
| 109 |
+
"NeuronValueNumbering": 0.005862236022949219,
|
| 110 |
+
"OptimizeAliasedCopyChain": 0.0014340877532958984,
|
| 111 |
+
"OptimizeNKIKernels": 0.0033910274505615234,
|
| 112 |
+
"PAGLayoutOpt": 1.3955655097961426,
|
| 113 |
+
"PComputeCutting": 0.01343226432800293,
|
| 114 |
+
"PGLayoutTilingPipeline": 2.056190013885498,
|
| 115 |
+
"PGTiling": 0.29619383811950684,
|
| 116 |
+
"PadElimination": 0.000553131103515625,
|
| 117 |
+
"ParAxesAnnotation": 1.3579421043395996,
|
| 118 |
+
"PartialLoopFusion": 0.04784822463989258,
|
| 119 |
+
"PartialSimdFusion": 0.1002810001373291,
|
| 120 |
+
"PenguinizeFunctions": 4.199999966658652e-05,
|
| 121 |
+
"PerfectLoopNest": 0.0037620067596435547,
|
| 122 |
+
"PruneFunctions": 3.199999991920777e-05,
|
| 123 |
+
"RecognizeOpIdiom": 0.0069119930267333984,
|
| 124 |
+
"Recompute": 0.0003383159637451172,
|
| 125 |
+
"RelaxPredicates": 0.04035329818725586,
|
| 126 |
+
"Rematerialization": 0.003230571746826172,
|
| 127 |
+
"RemoveOptimizationBarriers": 7.599999662488699e-05,
|
| 128 |
+
"ReshapeWeights": 0.0011525154113769531,
|
| 129 |
+
"ResolveAccessConflict": 0.0065386295318603516,
|
| 130 |
+
"ResolveComplicatePredicates": 0.002877473831176758,
|
| 131 |
+
"RewriteReplicationMatmul": 0.0025200843811035156,
|
| 132 |
+
"RewriteWeights": 0.0038268566131591797,
|
| 133 |
+
"SFKVectorizer": 0.48886895179748535,
|
| 134 |
+
"ScatterMotion": 1.8999999156221747e-05,
|
| 135 |
+
"SimpleAllReduceTiling": 0.0031387805938720703,
|
| 136 |
+
"Simplifier": 0.004804134368896484,
|
| 137 |
+
"SimplifyMacroPredicates": 0.01790642738342285,
|
| 138 |
+
"SimplifyNeuronTensor": 0.020508527755737305,
|
| 139 |
+
"SimplifySlice": 0.00145721435546875,
|
| 140 |
+
"SimplifyTensor": 0.016368389129638672,
|
| 141 |
+
"SpillPSum": 0.049539804458618164,
|
| 142 |
+
"SplitAPUnionSets": 0.060128211975097656,
|
| 143 |
+
"SplitAccGrp": 0.0025734901428222656,
|
| 144 |
+
"StaticProfiler": 0.006608724594116211,
|
| 145 |
+
"StaticTransposeLocalTensor": 0.008615732192993164,
|
| 146 |
+
"SundaISel": 0.06819963455200195,
|
| 147 |
+
"TCTransform": 0.0016434192657470703,
|
| 148 |
+
"TensorInitialization": 0.013004541397094727,
|
| 149 |
+
"TensorOpSimplifier": 0.011576175689697266,
|
| 150 |
+
"TensorOpTransform": 0.04517507553100586,
|
| 151 |
+
"TensorizerLegalizationPass": 4.8000001697801054e-05,
|
| 152 |
+
"TileCCOps": 0.011648893356323242,
|
| 153 |
+
"TilingProfiler": 0.02406597137451172,
|
| 154 |
+
"TransformConvOp": 0.004629850387573242,
|
| 155 |
+
"TritiumFusion": 0.26013898849487305,
|
| 156 |
+
"ValueNumbering": 0.004456520080566406,
|
| 157 |
+
"VectorizeDMA": 0.009630918502807617,
|
| 158 |
+
"VectorizeMatMult": 0.046350955963134766,
|
| 159 |
+
"VerifySupportedOps": 4.3000000005122274e-05,
|
| 160 |
+
"WeightCoalescing": 0.0030286312103271484,
|
| 161 |
+
"ZeroSizeTensorElimination": 0.00014090538024902344,
|
| 162 |
+
"algsimp": 0.002338999882340431,
|
| 163 |
+
"batchnorm_expander": 4.099999932805076e-05,
|
| 164 |
+
"boundary-marker-removal": 1.4000000192027073e-05,
|
| 165 |
+
"call-inliner": 0.0003630000283010304,
|
| 166 |
+
"canonicalize-boundary-marker": 1.799999881768599e-05,
|
| 167 |
+
"collective-stream-id-checker": 6.0999998822808266e-05,
|
| 168 |
+
"comparison-expander": 0.0004900000058114529,
|
| 169 |
+
"computation-deduplicator": 5.7999997807201e-05,
|
| 170 |
+
"config-lowering": 8.399999933317304e-05,
|
| 171 |
+
"constant-statistics": 0.00037799999699927866,
|
| 172 |
+
"constant_folding": 0.00025699997786432505,
|
| 173 |
+
"cse": 5.8999998145736754e-05,
|
| 174 |
+
"dce": 6.299999949987978e-05,
|
| 175 |
+
"dot_decomposer": 0.00088900001719594,
|
| 176 |
+
"dynamic-slice-transpose": 1.2999999853491317e-05,
|
| 177 |
+
"eliminate-redundant-compare": 0.0002390000008745119,
|
| 178 |
+
"emit-offloaded-dropout": 4.199999966658652e-05,
|
| 179 |
+
"flatten-call-graph": 0.000893999997060746,
|
| 180 |
+
"fuse-send-recv": 6.500000017695129e-05,
|
| 181 |
+
"hilo-conditional-to-select": 1.5999999959603883e-05,
|
| 182 |
+
"hilo::LegalizeAlias": 1.3999999282532372e-05,
|
| 183 |
+
"hilo::NeuronInstCombine": 0.00014899999951012433,
|
| 184 |
+
"hilo::NeuronOpFusion": 3.099999958067201e-05,
|
| 185 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 4.8999998398358e-05,
|
| 186 |
+
"hilo::ScheduleFusion": 2.7000001864507794e-05,
|
| 187 |
+
"hilo::SixtyFourHack": 7.100000220816582e-05,
|
| 188 |
+
"hilo::VerifyAliasing": 1.1000000085914508e-05,
|
| 189 |
+
"hlo-mac-count": 0.0006209999555721879,
|
| 190 |
+
"instruction-histogram": 0.0005789999850094318,
|
| 191 |
+
"io-con-pipe-begin": 6.000000212225132e-06,
|
| 192 |
+
"io-con-pipe-end": 9.999999974752427e-07,
|
| 193 |
+
"io-layout-normalization": 0.0008500000112690032,
|
| 194 |
+
"io-statistics": 4.3000000005122274e-05,
|
| 195 |
+
"legalize-ccops-for-tensorizer": 3.999999989900971e-06,
|
| 196 |
+
"legalize-compare": 1.2000000424450263e-05,
|
| 197 |
+
"lower-argminmax-custom-call": 1.2999998943996616e-05,
|
| 198 |
+
"map-inline": 0.0007699999841861427,
|
| 199 |
+
"metadata-naming": 5.499999679159373e-05,
|
| 200 |
+
"mlir::detail::OpToOpPassAdaptor": 0.0006780000403523445,
|
| 201 |
+
"mlir::hlo::MhloToPyPenguin": 0.003496000077575445,
|
| 202 |
+
"mlir::mhlo::LowerComplexExtraPass": 0.000299000006634742,
|
| 203 |
+
"mlir::mhlo::LowerComplexPass": 0.0007789999945089221,
|
| 204 |
+
"native-to-custom-softmax": 0.0005979999550618231,
|
| 205 |
+
"native-to-custom-softmax-dx": 0.0005189999938011169,
|
| 206 |
+
"neuron-hlo-verifier": 0.011839999817311764,
|
| 207 |
+
"operand_upcaster": 4.099999932805076e-05,
|
| 208 |
+
"opt-barrier-removal": 0.0004349999944679439,
|
| 209 |
+
"post-par-pipe-begin": 3.999999989900971e-06,
|
| 210 |
+
"post-par-pipe-end": 0.0,
|
| 211 |
+
"post-partition-simplification": 0.0016449999529868364,
|
| 212 |
+
"pre-par-pipe-begin": 1.9999999949504854e-06,
|
| 213 |
+
"pre-par-pipe-end": 0.0,
|
| 214 |
+
"pre-partition-simplification": 0.042899999767541885,
|
| 215 |
+
"replace-minimum-constant": 0.00044299999717622995,
|
| 216 |
+
"reshape-mover": 9.600000339560211e-05,
|
| 217 |
+
"simplify-concat": 0.0001939999929163605,
|
| 218 |
+
"simplify-while-loops": 7.500000356230885e-05,
|
| 219 |
+
"transform-variadic-reduce": 7.100000220816582e-05,
|
| 220 |
+
"tuple-simplifier": 0.00027200000477023423,
|
| 221 |
+
"unpack-nested-aws-ntwsr": 0.00031300002592615783,
|
| 222 |
+
"unroll-while-loop": 1.2999999853491317e-05,
|
| 223 |
+
"zero_sized_hlo_elimination": 0.0007820000173524022
|
| 224 |
+
},
|
| 225 |
+
"hilo": {
|
| 226 |
+
"ConstantSize": 1094.0,
|
| 227 |
+
"HloInputCount": 402.0,
|
| 228 |
+
"HloMacCount": 644556259328.0,
|
| 229 |
+
"HloOutputCount": 73.0,
|
| 230 |
+
"IfmapSize": 8795039744.0,
|
| 231 |
+
"OfmapSize": 604587520.0,
|
| 232 |
+
"OutputsReadFromCount": 0.0,
|
| 233 |
+
"PassthroughTensorsCount": 0.0,
|
| 234 |
+
"RedundantOutputCount": 0.0,
|
| 235 |
+
"Traffic": 2150728192.0
|
| 236 |
+
},
|
| 237 |
+
"tensorizer": {
|
| 238 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 95441.0,
|
| 239 |
+
"StaticProfiler::AifUb": 832.489990234375,
|
| 240 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 434.669921875,
|
| 241 |
+
"StaticProfiler::AverageDmaLength": 1623.94287109375,
|
| 242 |
+
"StaticProfiler::DDRTransferBytes": 1990484992.0,
|
| 243 |
+
"StaticProfiler::InternalTransferBytes": 1161470464.0,
|
| 244 |
+
"StaticProfiler::LoadExpanded": 1195909.0,
|
| 245 |
+
"StaticProfiler::StoreExpanded": 28288.0,
|
| 246 |
+
"StaticProfiler::TotalDMAExpanded": 1224197.0,
|
| 247 |
+
"StaticProfiler::TotalDynamicInstancesCount": 120578.0,
|
| 248 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 120578.0,
|
| 249 |
+
"StaticProfiler::TotalLNCComm": 0.0,
|
| 250 |
+
"StaticProfiler::TotalLNCCommTransfer": 0.0,
|
| 251 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
|
| 252 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
|
| 253 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 129.0,
|
| 254 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 63040.0,
|
| 255 |
+
"TilingProfiler::NumPfTransposes": 13.0,
|
| 256 |
+
"TilingProfiler::NumPfTransposesForIo": 3.0,
|
| 257 |
+
"TilingProfiler::NumPfTransposesForLocal": 8.0,
|
| 258 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 2.0,
|
| 259 |
+
"TilingProfiler::PfTransposeInstructions": 25889.0,
|
| 260 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 19040.0,
|
| 261 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 5825.0,
|
| 262 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 1024.0,
|
| 263 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 8.0,
|
| 264 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 3225.0,
|
| 265 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
|
| 266 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
|
| 267 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
|
| 268 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
|
| 269 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
|
| 270 |
+
"TransformConvOp::conv2d_column_packing": 0.0,
|
| 271 |
+
"TransformConvOp::conv2d_column_packing_1": 0.0,
|
| 272 |
+
"TransformConvOp::conv2d_column_packing_io10": 0.0,
|
| 273 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
|
| 274 |
+
}
|
| 275 |
+
},
|
| 276 |
+
"all": {
|
| 277 |
+
"compiletime": {
|
| 278 |
+
"algsimp": 0.0021269998978823423,
|
| 279 |
+
"call-inliner": 0.0003319999959785491,
|
| 280 |
+
"collective-stream-id-checker": 5.2999999752501026e-05,
|
| 281 |
+
"comparison-expander": 0.00047400000039488077,
|
| 282 |
+
"constant-statistics": 0.00037799999699927866,
|
| 283 |
+
"constant_folding": 0.00022899999748915434,
|
| 284 |
+
"dce": 5.999999848427251e-05,
|
| 285 |
+
"dot_decomposer": 0.00088900001719594,
|
| 286 |
+
"eliminate-redundant-compare": 0.00022499999613501132,
|
| 287 |
+
"flatten-call-graph": 0.0008660000166855752,
|
| 288 |
+
"hlo-mac-count": 0.0005499999970197678,
|
| 289 |
+
"instruction-histogram": 0.0005789999850094318,
|
| 290 |
+
"io-con-pipe-begin": 6.000000212225132e-06,
|
| 291 |
+
"io-con-pipe-end": 9.999999974752427e-07,
|
| 292 |
+
"io-layout-normalization": 0.0008500000112690032,
|
| 293 |
+
"io-statistics": 4.3000000005122274e-05,
|
| 294 |
+
"map-inline": 0.0007329999934881926,
|
| 295 |
+
"native-to-custom-softmax": 0.0005789999850094318,
|
| 296 |
+
"native-to-custom-softmax-dx": 0.00042799999937415123,
|
| 297 |
+
"neuron-hlo-verifier": 0.010563000105321407,
|
| 298 |
+
"opt-barrier-removal": 0.0004349999944679439,
|
| 299 |
+
"pre-par-pipe-begin": 1.9999999949504854e-06,
|
| 300 |
+
"pre-par-pipe-end": 0.0,
|
| 301 |
+
"pre-partition-simplification": 0.042899999767541885,
|
| 302 |
+
"replace-minimum-constant": 0.0004189999890513718,
|
| 303 |
+
"reshape-mover": 8.499999967170879e-05,
|
| 304 |
+
"simplify-while-loops": 6.800000119255856e-05,
|
| 305 |
+
"tuple-simplifier": 0.00025499999173916876,
|
| 306 |
+
"unpack-nested-aws-ntwsr": 0.0003000000142492354,
|
| 307 |
+
"unroll-while-loop": 1.2999999853491317e-05,
|
| 308 |
+
"zero_sized_hlo_elimination": 0.0007820000173524022
|
| 309 |
+
}
|
| 310 |
+
},
|
| 311 |
+
"sg00": {
|
| 312 |
+
"compiletime": {
|
| 313 |
+
"CanonicalizeConv": 3.9999998989515007e-05,
|
| 314 |
+
"CanonicalizeForTensorizer": 1.4999999621068127e-05,
|
| 315 |
+
"Canonicalizer": 0.00037799999699927866,
|
| 316 |
+
"HoistCompute": 1.1000000085914508e-05,
|
| 317 |
+
"IdentifyCrossPassTensors": 4.999999873689376e-05,
|
| 318 |
+
"MemcastMotion": 2.700000004551839e-05,
|
| 319 |
+
"PenguinizeFunctions": 1.8999999156221747e-05,
|
| 320 |
+
"PruneFunctions": 1.4000000192027073e-05,
|
| 321 |
+
"RemoveOptimizationBarriers": 5.2999999752501026e-05,
|
| 322 |
+
"ScatterMotion": 7.999999979801942e-06,
|
| 323 |
+
"TensorizerLegalizationPass": 2.5999999706982635e-05,
|
| 324 |
+
"VerifySupportedOps": 1.4000000192027073e-05,
|
| 325 |
+
"algsimp": 6.600000051548705e-05,
|
| 326 |
+
"batchnorm_expander": 1.2000000424450263e-05,
|
| 327 |
+
"boundary-marker-removal": 3.999999989900971e-06,
|
| 328 |
+
"call-inliner": 9.000000318337698e-06,
|
| 329 |
+
"canonicalize-boundary-marker": 4.999999873689376e-06,
|
| 330 |
+
"collective-stream-id-checker": 1.9999999949504854e-06,
|
| 331 |
+
"comparison-expander": 3.999999989900971e-06,
|
| 332 |
+
"computation-deduplicator": 9.999999747378752e-06,
|
| 333 |
+
"config-lowering": 2.8000000384054147e-05,
|
| 334 |
+
"constant_folding": 9.000000318337698e-06,
|
| 335 |
+
"cse": 1.2000000424450263e-05,
|
| 336 |
+
"dce": 9.999999974752427e-07,
|
| 337 |
+
"dynamic-slice-transpose": 3.999999989900971e-06,
|
| 338 |
+
"eliminate-redundant-compare": 3.999999989900971e-06,
|
| 339 |
+
"emit-offloaded-dropout": 1.2999999853491317e-05,
|
| 340 |
+
"flatten-call-graph": 7.999999979801942e-06,
|
| 341 |
+
"fuse-send-recv": 1.8999999156221747e-05,
|
| 342 |
+
"hilo-conditional-to-select": 3.999999989900971e-06,
|
| 343 |
+
"hilo::LegalizeAlias": 4.999999873689376e-06,
|
| 344 |
+
"hilo::NeuronInstCombine": 6.299999949987978e-05,
|
| 345 |
+
"hilo::NeuronOpFusion": 1.2999999853491317e-05,
|
| 346 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 2.9000000722589903e-05,
|
| 347 |
+
"hilo::ScheduleFusion": 1.8000000636675395e-05,
|
| 348 |
+
"hilo::SixtyFourHack": 4.70000013592653e-05,
|
| 349 |
+
"hilo::VerifyAliasing": 7.000000096013537e-06,
|
| 350 |
+
"hlo-mac-count": 1.8999999156221747e-05,
|
| 351 |
+
"legalize-ccops-for-tensorizer": 1.9999999949504854e-06,
|
| 352 |
+
"legalize-compare": 3.000000106112566e-06,
|
| 353 |
+
"lower-argminmax-custom-call": 3.999999989900971e-06,
|
| 354 |
+
"map-inline": 9.999999747378752e-06,
|
| 355 |
+
"metadata-naming": 1.4999999621068127e-05,
|
| 356 |
+
"mlir::detail::OpToOpPassAdaptor": 5.2999999752501026e-05,
|
| 357 |
+
"mlir::hlo::MhloToPyPenguin": 0.0010730000212788582,
|
| 358 |
+
"mlir::mhlo::LowerComplexExtraPass": 9.000000136438757e-05,
|
| 359 |
+
"mlir::mhlo::LowerComplexPass": 0.0004689999914262444,
|
| 360 |
+
"native-to-custom-softmax": 4.999999873689376e-06,
|
| 361 |
+
"native-to-custom-softmax-dx": 6.199999916134402e-05,
|
| 362 |
+
"neuron-hlo-verifier": 0.00036100001307204366,
|
| 363 |
+
"operand_upcaster": 1.2000000424450263e-05,
|
| 364 |
+
"post-par-pipe-begin": 9.999999974752427e-07,
|
| 365 |
+
"post-par-pipe-end": 0.0,
|
| 366 |
+
"post-partition-simplification": 0.0005000000237487257,
|
| 367 |
+
"replace-minimum-constant": 7.000000096013537e-06,
|
| 368 |
+
"reshape-mover": 3.999999989900971e-06,
|
| 369 |
+
"simplify-concat": 4.8999998398358e-05,
|
| 370 |
+
"simplify-while-loops": 1.9999999949504854e-06,
|
| 371 |
+
"transform-variadic-reduce": 9.000000318337698e-06,
|
| 372 |
+
"tuple-simplifier": 4.999999873689376e-06,
|
| 373 |
+
"unpack-nested-aws-ntwsr": 3.000000106112566e-06,
|
| 374 |
+
"unroll-while-loop": 0.0
|
| 375 |
+
},
|
| 376 |
+
"hilo": {
|
| 377 |
+
"ArithmeticIntensity": 157.8584747314453,
|
| 378 |
+
"ConstantSize": 1094.0,
|
| 379 |
+
"HloInputCount": 402.0,
|
| 380 |
+
"HloMacCount": 60129542144.0,
|
| 381 |
+
"HloOutputCount": 73.0,
|
| 382 |
+
"IfmapSize": 8795039744.0,
|
| 383 |
+
"OfmapSize": 604587520.0,
|
| 384 |
+
"OutputsReadFromCount": 0.0,
|
| 385 |
+
"PassthroughTensorsCount": 0.0,
|
| 386 |
+
"RedundantOutputCount": 0.0,
|
| 387 |
+
"Traffic": 761815872.0
|
| 388 |
+
}
|
| 389 |
+
},
|
| 390 |
+
"sg0000": {
|
| 391 |
+
"compiletime": {
|
| 392 |
+
"AGOrderingAnalysisPass": 0.04314303398132324,
|
| 393 |
+
"AffinePredicateResolution": 0.0014219284057617188,
|
| 394 |
+
"AliasDependencyElimination": 0.00013756752014160156,
|
| 395 |
+
"AliasDependencyInduction": 0.009345054626464844,
|
| 396 |
+
"AliasDependencyReset": 0.023563146591186523,
|
| 397 |
+
"BFComputeCutting": 0.0037696361541748047,
|
| 398 |
+
"BirCodeGenLoop": 0.13709354400634766,
|
| 399 |
+
"CCOpFusion": 0.061038970947265625,
|
| 400 |
+
"CanonicalizeDAGForPGTiling": 0.0033833980560302734,
|
| 401 |
+
"CanonicalizeIR": 0.0029594898223876953,
|
| 402 |
+
"CoalesceCCOp": 0.0030164718627929688,
|
| 403 |
+
"CommuteConcat": 0.0010838508605957031,
|
| 404 |
+
"DMALocalityOpt": 0.002034425735473633,
|
| 405 |
+
"DMAProfiler": 0.005063295364379883,
|
| 406 |
+
"DMATilingProfiler": 0.0045011043548583984,
|
| 407 |
+
"DataLocalityOpt": 0.10246086120605469,
|
| 408 |
+
"DataStreaming": 0.008306264877319336,
|
| 409 |
+
"DeConcat": 0.001977682113647461,
|
| 410 |
+
"DeadCodeElimination": 0.0022068023681640625,
|
| 411 |
+
"DeadStoreElimination": 0.028951644897460938,
|
| 412 |
+
"DelinearIndices": 0.009624004364013672,
|
| 413 |
+
"Delinearization": 0.0037741661071777344,
|
| 414 |
+
"DoNothing": 7.152557373046875e-05,
|
| 415 |
+
"DramToDramTranspose": 0.002131938934326172,
|
| 416 |
+
"DumpGraphAndMetadata": 0.007627725601196289,
|
| 417 |
+
"EliminateDivs": 0.0052356719970703125,
|
| 418 |
+
"ExpandBatchNorm": 0.0017452239990234375,
|
| 419 |
+
"ExpandISAMacro": 0.003320455551147461,
|
| 420 |
+
"FactorizeBlkDims": 0.03702902793884277,
|
| 421 |
+
"FactorizeThreadAxesInFreeDims": 0.002062082290649414,
|
| 422 |
+
"FlattenMacroLoop": 0.003297090530395508,
|
| 423 |
+
"GenericAccessSimplifier": 0.0008604526519775391,
|
| 424 |
+
"InferInitValue": 0.038228511810302734,
|
| 425 |
+
"InferIntrinsicOnCC": 0.010860443115234375,
|
| 426 |
+
"InferNeuronTensor": 0.05763053894042969,
|
| 427 |
+
"InferNonlocalTensors": 0.2222437858581543,
|
| 428 |
+
"InferPSumTensor": 0.0829617977142334,
|
| 429 |
+
"InlineNativeKernels": 0.0021293163299560547,
|
| 430 |
+
"InsertIOTransposes": 0.016276836395263672,
|
| 431 |
+
"InsertLocalTransposes": 0.009328603744506836,
|
| 432 |
+
"InsertOffloadedTransposes": 0.009638786315917969,
|
| 433 |
+
"LICM": 0.003412008285522461,
|
| 434 |
+
"LateLegalizeInst": 0.009476900100708008,
|
| 435 |
+
"LateLegalizePostSplit": 0.00451970100402832,
|
| 436 |
+
"LateLowerReshapeOp": 0.001276254653930664,
|
| 437 |
+
"LateLowerTensorOp": 0.0059833526611328125,
|
| 438 |
+
"LateNeuronInstComb": 0.017744779586791992,
|
| 439 |
+
"LayoutPreprocessing": 0.03296685218811035,
|
| 440 |
+
"LayoutPreprocessingAndAnalysis": 0.1081993579864502,
|
| 441 |
+
"LayoutRequirementAnalysis": 0.00911259651184082,
|
| 442 |
+
"LegalizeCCOpLayout": 0.002354860305786133,
|
| 443 |
+
"LegalizeOpLevelAlias": 0.001096487045288086,
|
| 444 |
+
"LegalizePartitionReduce": 0.0018439292907714844,
|
| 445 |
+
"LegalizeSundaAccess": 0.04656553268432617,
|
| 446 |
+
"LegalizeSundaMacro": 0.011276006698608398,
|
| 447 |
+
"LegalizeType": 0.004993438720703125,
|
| 448 |
+
"LocalLayoutOpt": 0.03078746795654297,
|
| 449 |
+
"LoopFusion": 0.00582575798034668,
|
| 450 |
+
"LoopSplitting": 0.0003783702850341797,
|
| 451 |
+
"LowerBroadcast": 0.0020279884338378906,
|
| 452 |
+
"LowerCCOpBlockAxis": 0.006699562072753906,
|
| 453 |
+
"LowerComplexBroadcast": 0.002343893051147461,
|
| 454 |
+
"LowerIntrinsics": 0.050733089447021484,
|
| 455 |
+
"LowerTensorOp": 0.013502359390258789,
|
| 456 |
+
"LowerTranspose": 0.018018484115600586,
|
| 457 |
+
"MacroGeneration": 0.10082674026489258,
|
| 458 |
+
"MaskPropagation": 0.005489349365234375,
|
| 459 |
+
"MemcpyElimination": 0.12442874908447266,
|
| 460 |
+
"MutateDataType": 0.0011963844299316406,
|
| 461 |
+
"NeuronAliasDependencyInduction": 0.00037288665771484375,
|
| 462 |
+
"NeuronAliasDependencyReset": 0.012840986251831055,
|
| 463 |
+
"NeuronInstComb": 0.01097249984741211,
|
| 464 |
+
"NeuronLICM": 0.01376652717590332,
|
| 465 |
+
"NeuronLoopFusion": 0.017533540725708008,
|
| 466 |
+
"NeuronLoopInterchange": 0.002491474151611328,
|
| 467 |
+
"NeuronSimplifier": 0.013012886047363281,
|
| 468 |
+
"NeuronSimplifyPredicates": 0.027909040451049805,
|
| 469 |
+
"NeuronValueNumbering": 0.0057392120361328125,
|
| 470 |
+
"OptimizeAliasedCopyChain": 0.0006589889526367188,
|
| 471 |
+
"OptimizeNKIKernels": 0.0025641918182373047,
|
| 472 |
+
"PAGLayoutOpt": 0.24210047721862793,
|
| 473 |
+
"PComputeCutting": 0.008717536926269531,
|
| 474 |
+
"PGLayoutTilingPipeline": 0.9638533592224121,
|
| 475 |
+
"PGTiling": 0.20569705963134766,
|
| 476 |
+
"PadElimination": 0.0004475116729736328,
|
| 477 |
+
"ParAxesAnnotation": 0.22005105018615723,
|
| 478 |
+
"PartialLoopFusion": 0.047158002853393555,
|
| 479 |
+
"PartialSimdFusion": 0.07779932022094727,
|
| 480 |
+
"PerfectLoopNest": 0.0020694732666015625,
|
| 481 |
+
"RecognizeOpIdiom": 0.0045545101165771484,
|
| 482 |
+
"Recompute": 0.00025963783264160156,
|
| 483 |
+
"RelaxPredicates": 0.004879951477050781,
|
| 484 |
+
"Rematerialization": 0.005000591278076172,
|
| 485 |
+
"ReshapeWeights": 0.0006878376007080078,
|
| 486 |
+
"ResolveAccessConflict": 0.0038623809814453125,
|
| 487 |
+
"ResolveComplicatePredicates": 0.0014736652374267578,
|
| 488 |
+
"RewriteReplicationMatmul": 0.0016112327575683594,
|
| 489 |
+
"RewriteWeights": 0.002537965774536133,
|
| 490 |
+
"SFKVectorizer": 0.42055439949035645,
|
| 491 |
+
"SimpleAllReduceTiling": 0.002585887908935547,
|
| 492 |
+
"Simplifier": 0.0035560131072998047,
|
| 493 |
+
"SimplifyMacroPredicates": 0.02046680450439453,
|
| 494 |
+
"SimplifyNeuronTensor": 0.023018360137939453,
|
| 495 |
+
"SimplifySlice": 0.0010895729064941406,
|
| 496 |
+
"SimplifyTensor": 0.008915424346923828,
|
| 497 |
+
"SpillPSum": 0.032480716705322266,
|
| 498 |
+
"SplitAPUnionSets": 0.11712884902954102,
|
| 499 |
+
"SplitAccGrp": 0.0018432140350341797,
|
| 500 |
+
"StaticProfiler": 0.005769252777099609,
|
| 501 |
+
"StaticTransposeLocalTensor": 0.005156517028808594,
|
| 502 |
+
"SundaISel": 0.04348421096801758,
|
| 503 |
+
"TCTransform": 0.0010843276977539063,
|
| 504 |
+
"TensorInitialization": 0.03327298164367676,
|
| 505 |
+
"TensorOpSimplifier": 0.008444547653198242,
|
| 506 |
+
"TensorOpTransform": 0.025008678436279297,
|
| 507 |
+
"TileCCOps": 0.008762598037719727,
|
| 508 |
+
"TilingProfiler": 0.015444755554199219,
|
| 509 |
+
"TransformConvOp": 0.003448963165283203,
|
| 510 |
+
"TritiumFusion": 0.09676671028137207,
|
| 511 |
+
"ValueNumbering": 0.0035293102264404297,
|
| 512 |
+
"VectorizeDMA": 0.008908271789550781,
|
| 513 |
+
"VectorizeMatMult": 0.025291919708251953,
|
| 514 |
+
"WeightCoalescing": 0.00263214111328125,
|
| 515 |
+
"ZeroSizeTensorElimination": 0.00011610984802246094
|
| 516 |
+
},
|
| 517 |
+
"tensorizer": {
|
| 518 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 15106.0,
|
| 519 |
+
"StaticProfiler::AifUb": 201.88758850097656,
|
| 520 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 641.6123657226563,
|
| 521 |
+
"StaticProfiler::AverageDmaLength": 3126.786865234375,
|
| 522 |
+
"StaticProfiler::AverageFractalPeUtilization": 99.9704360961914,
|
| 523 |
+
"StaticProfiler::AveragePartitionUtilization": 99.9277572631836,
|
| 524 |
+
"StaticProfiler::AveragePeUtilization": 99.87899780273438,
|
| 525 |
+
"StaticProfiler::DDRTransferBytes": 231802112.0,
|
| 526 |
+
"StaticProfiler::InternalTransferBytes": 335020032.0,
|
| 527 |
+
"StaticProfiler::LoadExpanded": 43652.0,
|
| 528 |
+
"StaticProfiler::LocalizationEfficiency": 317.8067321777344,
|
| 529 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 425.62677001953125,
|
| 530 |
+
"StaticProfiler::StoreExpanded": 24065.0,
|
| 531 |
+
"StaticProfiler::TotalDMAExpanded": 67717.0,
|
| 532 |
+
"StaticProfiler::TotalDynamicInstancesCount": 24368.0,
|
| 533 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 24356.0,
|
| 534 |
+
"StaticProfiler::TotalLNCComm": 0.0,
|
| 535 |
+
"StaticProfiler::TotalLNCCommTransfer": 0.0,
|
| 536 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
|
| 537 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
|
| 538 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
|
| 539 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
|
| 540 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 192.0,
|
| 541 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 7184.0,
|
| 542 |
+
"TilingProfiler::NumPfTransposes": 8.0,
|
| 543 |
+
"TilingProfiler::NumPfTransposesForIo": 0.0,
|
| 544 |
+
"TilingProfiler::NumPfTransposesForLocal": 6.0,
|
| 545 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 2.0,
|
| 546 |
+
"TilingProfiler::PfTransposeInstructions": 5568.0,
|
| 547 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 0.0,
|
| 548 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 4800.0,
|
| 549 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 768.0,
|
| 550 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
|
| 551 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 1635.0,
|
| 552 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
|
| 553 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
|
| 554 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
|
| 555 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
|
| 556 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
|
| 557 |
+
"TransformConvOp::conv2d_column_packing": 0.0,
|
| 558 |
+
"TransformConvOp::conv2d_column_packing_1": 0.0,
|
| 559 |
+
"TransformConvOp::conv2d_column_packing_io10": 0.0,
|
| 560 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
|
| 561 |
+
}
|
| 562 |
+
},
|
| 563 |
+
"sg0001": {
|
| 564 |
+
"compiletime": {
|
| 565 |
+
"AGOrderingAnalysisPass": 0.04393625259399414,
|
| 566 |
+
"AffinePredicateResolution": 0.001329183578491211,
|
| 567 |
+
"AliasDependencyElimination": 0.00010919570922851563,
|
| 568 |
+
"AliasDependencyInduction": 0.009439468383789063,
|
| 569 |
+
"AliasDependencyReset": 0.02581024169921875,
|
| 570 |
+
"BFComputeCutting": 0.0038251876831054688,
|
| 571 |
+
"BirCodeGenLoop": 0.10795426368713379,
|
| 572 |
+
"CCOpFusion": 0.05731463432312012,
|
| 573 |
+
"CanonicalizeDAGForPGTiling": 0.0033617019653320313,
|
| 574 |
+
"CanonicalizeIR": 0.0017633438110351563,
|
| 575 |
+
"CoalesceCCOp": 0.0020570755004882813,
|
| 576 |
+
"CommuteConcat": 0.0011479854583740234,
|
| 577 |
+
"DMALocalityOpt": 0.0014009475708007813,
|
| 578 |
+
"DMAProfiler": 0.0043773651123046875,
|
| 579 |
+
"DMATilingProfiler": 0.005214691162109375,
|
| 580 |
+
"DataLocalityOpt": 0.14134621620178223,
|
| 581 |
+
"DataStreaming": 0.006264209747314453,
|
| 582 |
+
"DeConcat": 0.0018208026885986328,
|
| 583 |
+
"DeadCodeElimination": 0.0017764568328857422,
|
| 584 |
+
"DeadStoreElimination": 0.03261542320251465,
|
| 585 |
+
"DelinearIndices": 0.011170148849487305,
|
| 586 |
+
"Delinearization": 0.006006479263305664,
|
| 587 |
+
"DoNothing": 7.104873657226563e-05,
|
| 588 |
+
"DramToDramTranspose": 0.002429485321044922,
|
| 589 |
+
"DumpGraphAndMetadata": 0.006948947906494141,
|
| 590 |
+
"EliminateDivs": 0.0039048194885253906,
|
| 591 |
+
"ExpandBatchNorm": 0.0020303726196289063,
|
| 592 |
+
"ExpandISAMacro": 0.0029213428497314453,
|
| 593 |
+
"FactorizeBlkDims": 0.028472423553466797,
|
| 594 |
+
"FactorizeThreadAxesInFreeDims": 0.002131938934326172,
|
| 595 |
+
"FlattenMacroLoop": 0.0029516220092773438,
|
| 596 |
+
"GenericAccessSimplifier": 0.0009126663208007813,
|
| 597 |
+
"InferInitValue": 0.045678138732910156,
|
| 598 |
+
"InferIntrinsicOnCC": 0.01059865951538086,
|
| 599 |
+
"InferNeuronTensor": 0.06629371643066406,
|
| 600 |
+
"InferNonlocalTensors": 0.03510117530822754,
|
| 601 |
+
"InferPSumTensor": 0.06450939178466797,
|
| 602 |
+
"InlineNativeKernels": 0.0018451213836669922,
|
| 603 |
+
"InsertIOTransposes": 0.026484012603759766,
|
| 604 |
+
"InsertLocalTransposes": 0.008344173431396484,
|
| 605 |
+
"InsertOffloadedTransposes": 0.0071446895599365234,
|
| 606 |
+
"LICM": 0.0036759376525878906,
|
| 607 |
+
"LateLegalizeInst": 0.004426479339599609,
|
| 608 |
+
"LateLegalizePostSplit": 0.0032634735107421875,
|
| 609 |
+
"LateLowerReshapeOp": 0.0013735294342041016,
|
| 610 |
+
"LateLowerTensorOp": 0.005624532699584961,
|
| 611 |
+
"LateNeuronInstComb": 0.017444849014282227,
|
| 612 |
+
"LayoutPreprocessing": 0.03893160820007324,
|
| 613 |
+
"LayoutPreprocessingAndAnalysis": 0.07227110862731934,
|
| 614 |
+
"LayoutRequirementAnalysis": 0.009693145751953125,
|
| 615 |
+
"LegalizeCCOpLayout": 0.0020418167114257813,
|
| 616 |
+
"LegalizeOpLevelAlias": 0.001138448715209961,
|
| 617 |
+
"LegalizePartitionReduce": 0.0015141963958740234,
|
| 618 |
+
"LegalizeSundaAccess": 0.019158363342285156,
|
| 619 |
+
"LegalizeSundaMacro": 0.01407480239868164,
|
| 620 |
+
"LegalizeType": 0.005761623382568359,
|
| 621 |
+
"LocalLayoutOpt": 0.039359331130981445,
|
| 622 |
+
"LoopFusion": 0.006695985794067383,
|
| 623 |
+
"LoopSplitting": 0.0003750324249267578,
|
| 624 |
+
"LowerBroadcast": 0.0017099380493164063,
|
| 625 |
+
"LowerCCOpBlockAxis": 0.006280422210693359,
|
| 626 |
+
"LowerComplexBroadcast": 0.0021283626556396484,
|
| 627 |
+
"LowerIntrinsics": 0.051717519760131836,
|
| 628 |
+
"LowerTensorOp": 0.012630462646484375,
|
| 629 |
+
"LowerTranspose": 0.018566608428955078,
|
| 630 |
+
"MacroGeneration": 0.10026049613952637,
|
| 631 |
+
"MaskPropagation": 0.003166675567626953,
|
| 632 |
+
"MemcpyElimination": 0.11932373046875,
|
| 633 |
+
"MutateDataType": 0.0012645721435546875,
|
| 634 |
+
"NeuronAliasDependencyInduction": 0.0003998279571533203,
|
| 635 |
+
"NeuronAliasDependencyReset": 0.012525796890258789,
|
| 636 |
+
"NeuronInstComb": 0.009615182876586914,
|
| 637 |
+
"NeuronLICM": 0.010987997055053711,
|
| 638 |
+
"NeuronLoopFusion": 0.02326488494873047,
|
| 639 |
+
"NeuronLoopInterchange": 0.002103090286254883,
|
| 640 |
+
"NeuronSimplifier": 0.014336109161376953,
|
| 641 |
+
"NeuronSimplifyPredicates": 0.008521318435668945,
|
| 642 |
+
"NeuronValueNumbering": 0.0033826828002929688,
|
| 643 |
+
"OptimizeAliasedCopyChain": 0.0006175041198730469,
|
| 644 |
+
"OptimizeNKIKernels": 0.0019922256469726563,
|
| 645 |
+
"PAGLayoutOpt": 0.4941246509552002,
|
| 646 |
+
"PComputeCutting": 0.007924079895019531,
|
| 647 |
+
"PGLayoutTilingPipeline": 0.9564568996429443,
|
| 648 |
+
"PGTiling": 0.1994781494140625,
|
| 649 |
+
"PadElimination": 0.00040078163146972656,
|
| 650 |
+
"ParAxesAnnotation": 0.4647810459136963,
|
| 651 |
+
"PartialLoopFusion": 0.022934675216674805,
|
| 652 |
+
"PartialSimdFusion": 0.0667269229888916,
|
| 653 |
+
"PerfectLoopNest": 0.002288341522216797,
|
| 654 |
+
"RecognizeOpIdiom": 0.004983663558959961,
|
| 655 |
+
"Recompute": 0.00026607513427734375,
|
| 656 |
+
"RelaxPredicates": 0.003928184509277344,
|
| 657 |
+
"Rematerialization": 0.0023522377014160156,
|
| 658 |
+
"ReshapeWeights": 0.0006775856018066406,
|
| 659 |
+
"ResolveAccessConflict": 0.006055116653442383,
|
| 660 |
+
"ResolveComplicatePredicates": 0.0013859272003173828,
|
| 661 |
+
"RewriteReplicationMatmul": 0.0016639232635498047,
|
| 662 |
+
"RewriteWeights": 0.0022728443145751953,
|
| 663 |
+
"SFKVectorizer": 0.2814767360687256,
|
| 664 |
+
"SimpleAllReduceTiling": 0.0020284652709960938,
|
| 665 |
+
"Simplifier": 0.0032384395599365234,
|
| 666 |
+
"SimplifyMacroPredicates": 0.0071146488189697266,
|
| 667 |
+
"SimplifyNeuronTensor": 0.014531373977661133,
|
| 668 |
+
"SimplifySlice": 0.0009911060333251953,
|
| 669 |
+
"SimplifyTensor": 0.009533405303955078,
|
| 670 |
+
"SpillPSum": 0.02969837188720703,
|
| 671 |
+
"SplitAPUnionSets": 0.03618764877319336,
|
| 672 |
+
"SplitAccGrp": 0.0015993118286132813,
|
| 673 |
+
"StaticProfiler": 0.004440784454345703,
|
| 674 |
+
"StaticTransposeLocalTensor": 0.0054094791412353516,
|
| 675 |
+
"SundaISel": 0.04047369956970215,
|
| 676 |
+
"TCTransform": 0.0010793209075927734,
|
| 677 |
+
"TensorInitialization": 0.007404804229736328,
|
| 678 |
+
"TensorOpSimplifier": 0.008566856384277344,
|
| 679 |
+
"TensorOpTransform": 0.03130841255187988,
|
| 680 |
+
"TileCCOps": 0.008295297622680664,
|
| 681 |
+
"TilingProfiler": 0.01665639877319336,
|
| 682 |
+
"TransformConvOp": 0.0034193992614746094,
|
| 683 |
+
"TritiumFusion": 0.13472461700439453,
|
| 684 |
+
"ValueNumbering": 0.002829313278198242,
|
| 685 |
+
"VectorizeDMA": 0.002307891845703125,
|
| 686 |
+
"VectorizeMatMult": 0.02826690673828125,
|
| 687 |
+
"WeightCoalescing": 0.0019905567169189453,
|
| 688 |
+
"ZeroSizeTensorElimination": 0.00034880638122558594
|
| 689 |
+
},
|
| 690 |
+
"tensorizer": {
|
| 691 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 34504.0,
|
| 692 |
+
"StaticProfiler::AifUb": 1543.150390625,
|
| 693 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 599.2274169921875,
|
| 694 |
+
"StaticProfiler::AverageDmaLength": 1330.70458984375,
|
| 695 |
+
"StaticProfiler::AverageFractalPeUtilization": 100.0,
|
| 696 |
+
"StaticProfiler::AveragePartitionUtilization": 99.83068084716797,
|
| 697 |
+
"StaticProfiler::AveragePeUtilization": 100.0,
|
| 698 |
+
"StaticProfiler::DDRTransferBytes": 769721344.0,
|
| 699 |
+
"StaticProfiler::InternalTransferBytes": 414711808.0,
|
| 700 |
+
"StaticProfiler::LoadExpanded": 545409.0,
|
| 701 |
+
"StaticProfiler::LocalizationEfficiency": 38.8314323425293,
|
| 702 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 42.54035186767578,
|
| 703 |
+
"StaticProfiler::StoreExpanded": 30721.0,
|
| 704 |
+
"StaticProfiler::TotalDMAExpanded": 576130.0,
|
| 705 |
+
"StaticProfiler::TotalDynamicInstancesCount": 47251.0,
|
| 706 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 47251.0,
|
| 707 |
+
"StaticProfiler::TotalLNCComm": 0.0,
|
| 708 |
+
"StaticProfiler::TotalLNCCommTransfer": 0.0,
|
| 709 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
|
| 710 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
|
| 711 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
|
| 712 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
|
| 713 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 128.0,
|
| 714 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 25600.0,
|
| 715 |
+
"TilingProfiler::NumPfTransposes": 9.0,
|
| 716 |
+
"TilingProfiler::NumPfTransposesForIo": 3.0,
|
| 717 |
+
"TilingProfiler::NumPfTransposesForLocal": 4.0,
|
| 718 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 2.0,
|
| 719 |
+
"TilingProfiler::PfTransposeInstructions": 5856.0,
|
| 720 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 544.0,
|
| 721 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 4288.0,
|
| 722 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 1024.0,
|
| 723 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
|
| 724 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 1923.0,
|
| 725 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
|
| 726 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
|
| 727 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
|
| 728 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
|
| 729 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
|
| 730 |
+
"TransformConvOp::conv2d_column_packing": 0.0,
|
| 731 |
+
"TransformConvOp::conv2d_column_packing_1": 0.0,
|
| 732 |
+
"TransformConvOp::conv2d_column_packing_io10": 0.0,
|
| 733 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
|
| 734 |
+
}
|
| 735 |
+
},
|
| 736 |
+
"sg0002": {
|
| 737 |
+
"compiletime": {
|
| 738 |
+
"AGOrderingAnalysisPass": 0.07080268859863281,
|
| 739 |
+
"AffinePredicateResolution": 0.001844644546508789,
|
| 740 |
+
"AliasDependencyElimination": 0.0001308917999267578,
|
| 741 |
+
"AliasDependencyInduction": 0.012178182601928711,
|
| 742 |
+
"AliasDependencyReset": 0.027022123336791992,
|
| 743 |
+
"BFComputeCutting": 0.0061855316162109375,
|
| 744 |
+
"BirCodeGenLoop": 0.17315936088562012,
|
| 745 |
+
"CCOpFusion": 0.08119010925292969,
|
| 746 |
+
"CanonicalizeDAGForPGTiling": 0.0066263675689697266,
|
| 747 |
+
"CanonicalizeIR": 0.0033893585205078125,
|
| 748 |
+
"CoalesceCCOp": 0.0032279491424560547,
|
| 749 |
+
"CommuteConcat": 0.0016355514526367188,
|
| 750 |
+
"DMALocalityOpt": 0.0021250247955322266,
|
| 751 |
+
"DMAProfiler": 0.005837917327880859,
|
| 752 |
+
"DMATilingProfiler": 0.010099172592163086,
|
| 753 |
+
"DataLocalityOpt": 0.25125575065612793,
|
| 754 |
+
"DataStreaming": 0.010326147079467773,
|
| 755 |
+
"DeConcat": 0.002583742141723633,
|
| 756 |
+
"DeadCodeElimination": 0.0018777847290039063,
|
| 757 |
+
"DeadStoreElimination": 0.05162811279296875,
|
| 758 |
+
"DelinearIndices": 0.017117977142333984,
|
| 759 |
+
"Delinearization": 0.006100893020629883,
|
| 760 |
+
"DoNothing": 6.961822509765625e-05,
|
| 761 |
+
"DramToDramTranspose": 0.0047817230224609375,
|
| 762 |
+
"DumpGraphAndMetadata": 0.008226871490478516,
|
| 763 |
+
"EliminateDivs": 0.005579710006713867,
|
| 764 |
+
"ExpandBatchNorm": 0.0024263858795166016,
|
| 765 |
+
"ExpandISAMacro": 0.004798412322998047,
|
| 766 |
+
"FactorizeBlkDims": 0.059967756271362305,
|
| 767 |
+
"FactorizeThreadAxesInFreeDims": 0.003553628921508789,
|
| 768 |
+
"FlattenMacroLoop": 0.0051920413970947266,
|
| 769 |
+
"GenericAccessSimplifier": 0.0013842582702636719,
|
| 770 |
+
"InferInitValue": 0.07938385009765625,
|
| 771 |
+
"InferIntrinsicOnCC": 0.0170440673828125,
|
| 772 |
+
"InferNeuronTensor": 0.09969878196716309,
|
| 773 |
+
"InferNonlocalTensors": 0.11206626892089844,
|
| 774 |
+
"InferPSumTensor": 0.0999910831451416,
|
| 775 |
+
"InlineNativeKernels": 0.003079652786254883,
|
| 776 |
+
"InsertIOTransposes": 0.031575918197631836,
|
| 777 |
+
"InsertLocalTransposes": 0.01450800895690918,
|
| 778 |
+
"InsertOffloadedTransposes": 0.010621309280395508,
|
| 779 |
+
"LICM": 0.006058454513549805,
|
| 780 |
+
"LateLegalizeInst": 0.009308576583862305,
|
| 781 |
+
"LateLegalizePostSplit": 0.005577564239501953,
|
| 782 |
+
"LateLowerReshapeOp": 0.002005338668823242,
|
| 783 |
+
"LateLowerTensorOp": 0.006224155426025391,
|
| 784 |
+
"LateNeuronInstComb": 0.026279211044311523,
|
| 785 |
+
"LayoutPreprocessing": 0.045662879943847656,
|
| 786 |
+
"LayoutPreprocessingAndAnalysis": 0.08894896507263184,
|
| 787 |
+
"LayoutRequirementAnalysis": 0.014644384384155273,
|
| 788 |
+
"LegalizeCCOpLayout": 0.002945423126220703,
|
| 789 |
+
"LegalizeOpLevelAlias": 0.0015463829040527344,
|
| 790 |
+
"LegalizePartitionReduce": 0.0025038719177246094,
|
| 791 |
+
"LegalizeSundaAccess": 0.05164527893066406,
|
| 792 |
+
"LegalizeSundaMacro": 0.02343463897705078,
|
| 793 |
+
"LegalizeType": 0.007515668869018555,
|
| 794 |
+
"LocalLayoutOpt": 0.05471658706665039,
|
| 795 |
+
"LoopFusion": 0.009645700454711914,
|
| 796 |
+
"LoopSplitting": 0.0005736351013183594,
|
| 797 |
+
"LowerBroadcast": 0.0030760765075683594,
|
| 798 |
+
"LowerCCOpBlockAxis": 0.010100364685058594,
|
| 799 |
+
"LowerComplexBroadcast": 0.004259347915649414,
|
| 800 |
+
"LowerIntrinsics": 0.071380615234375,
|
| 801 |
+
"LowerTensorOp": 0.017409563064575195,
|
| 802 |
+
"LowerTranspose": 2.716614007949829,
|
| 803 |
+
"MacroGeneration": 0.15842843055725098,
|
| 804 |
+
"MaskPropagation": 0.004798412322998047,
|
| 805 |
+
"MemcpyElimination": 0.14069795608520508,
|
| 806 |
+
"MutateDataType": 0.0018687248229980469,
|
| 807 |
+
"NeuronAliasDependencyInduction": 0.0005295276641845703,
|
| 808 |
+
"NeuronAliasDependencyReset": 0.014295816421508789,
|
| 809 |
+
"NeuronInstComb": 0.014310121536254883,
|
| 810 |
+
"NeuronLICM": 0.01824188232421875,
|
| 811 |
+
"NeuronLoopFusion": 0.03763270378112793,
|
| 812 |
+
"NeuronLoopInterchange": 0.0033299922943115234,
|
| 813 |
+
"NeuronSimplifier": 0.02371072769165039,
|
| 814 |
+
"NeuronSimplifyPredicates": 0.010996103286743164,
|
| 815 |
+
"NeuronValueNumbering": 0.005862236022949219,
|
| 816 |
+
"OptimizeAliasedCopyChain": 0.0014340877532958984,
|
| 817 |
+
"OptimizeNKIKernels": 0.0033910274505615234,
|
| 818 |
+
"PAGLayoutOpt": 1.3955655097961426,
|
| 819 |
+
"PComputeCutting": 0.01343226432800293,
|
| 820 |
+
"PGLayoutTilingPipeline": 2.056190013885498,
|
| 821 |
+
"PGTiling": 0.29619383811950684,
|
| 822 |
+
"PadElimination": 0.000553131103515625,
|
| 823 |
+
"ParAxesAnnotation": 1.3579421043395996,
|
| 824 |
+
"PartialLoopFusion": 0.04784822463989258,
|
| 825 |
+
"PartialSimdFusion": 0.1002810001373291,
|
| 826 |
+
"PerfectLoopNest": 0.0037620067596435547,
|
| 827 |
+
"RecognizeOpIdiom": 0.0069119930267333984,
|
| 828 |
+
"Recompute": 0.0003383159637451172,
|
| 829 |
+
"RelaxPredicates": 0.04035329818725586,
|
| 830 |
+
"Rematerialization": 0.003230571746826172,
|
| 831 |
+
"ReshapeWeights": 0.0011525154113769531,
|
| 832 |
+
"ResolveAccessConflict": 0.0065386295318603516,
|
| 833 |
+
"ResolveComplicatePredicates": 0.002877473831176758,
|
| 834 |
+
"RewriteReplicationMatmul": 0.0025200843811035156,
|
| 835 |
+
"RewriteWeights": 0.0038268566131591797,
|
| 836 |
+
"SFKVectorizer": 0.48886895179748535,
|
| 837 |
+
"SimpleAllReduceTiling": 0.0031387805938720703,
|
| 838 |
+
"Simplifier": 0.004804134368896484,
|
| 839 |
+
"SimplifyMacroPredicates": 0.01790642738342285,
|
| 840 |
+
"SimplifyNeuronTensor": 0.020508527755737305,
|
| 841 |
+
"SimplifySlice": 0.00145721435546875,
|
| 842 |
+
"SimplifyTensor": 0.016368389129638672,
|
| 843 |
+
"SpillPSum": 0.049539804458618164,
|
| 844 |
+
"SplitAPUnionSets": 0.060128211975097656,
|
| 845 |
+
"SplitAccGrp": 0.0025734901428222656,
|
| 846 |
+
"StaticProfiler": 0.006608724594116211,
|
| 847 |
+
"StaticTransposeLocalTensor": 0.008615732192993164,
|
| 848 |
+
"SundaISel": 0.06819963455200195,
|
| 849 |
+
"TCTransform": 0.0016434192657470703,
|
| 850 |
+
"TensorInitialization": 0.013004541397094727,
|
| 851 |
+
"TensorOpSimplifier": 0.011576175689697266,
|
| 852 |
+
"TensorOpTransform": 0.04517507553100586,
|
| 853 |
+
"TileCCOps": 0.011648893356323242,
|
| 854 |
+
"TilingProfiler": 0.02406597137451172,
|
| 855 |
+
"TransformConvOp": 0.004629850387573242,
|
| 856 |
+
"TritiumFusion": 0.26013898849487305,
|
| 857 |
+
"ValueNumbering": 0.004456520080566406,
|
| 858 |
+
"VectorizeDMA": 0.009630918502807617,
|
| 859 |
+
"VectorizeMatMult": 0.046350955963134766,
|
| 860 |
+
"WeightCoalescing": 0.0030286312103271484,
|
| 861 |
+
"ZeroSizeTensorElimination": 0.00014090538024902344
|
| 862 |
+
},
|
| 863 |
+
"tensorizer": {
|
| 864 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 95441.0,
|
| 865 |
+
"StaticProfiler::AifUb": 832.489990234375,
|
| 866 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 434.669921875,
|
| 867 |
+
"StaticProfiler::AverageDmaLength": 1623.94287109375,
|
| 868 |
+
"StaticProfiler::AverageFractalPeUtilization": 99.99919128417969,
|
| 869 |
+
"StaticProfiler::AveragePartitionUtilization": 99.9390869140625,
|
| 870 |
+
"StaticProfiler::AveragePeUtilization": 99.99919128417969,
|
| 871 |
+
"StaticProfiler::DDRTransferBytes": 1990484992.0,
|
| 872 |
+
"StaticProfiler::InternalTransferBytes": 1161470464.0,
|
| 873 |
+
"StaticProfiler::LoadExpanded": 1195909.0,
|
| 874 |
+
"StaticProfiler::LocalizationEfficiency": 52.21323013305664,
|
| 875 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 56.50444793701172,
|
| 876 |
+
"StaticProfiler::StoreExpanded": 28288.0,
|
| 877 |
+
"StaticProfiler::TotalDMAExpanded": 1224197.0,
|
| 878 |
+
"StaticProfiler::TotalDynamicInstancesCount": 120578.0,
|
| 879 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 120578.0,
|
| 880 |
+
"StaticProfiler::TotalLNCComm": 0.0,
|
| 881 |
+
"StaticProfiler::TotalLNCCommTransfer": 0.0,
|
| 882 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
|
| 883 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
|
| 884 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
|
| 885 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
|
| 886 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 129.0,
|
| 887 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 63040.0,
|
| 888 |
+
"TilingProfiler::NumPfTransposes": 13.0,
|
| 889 |
+
"TilingProfiler::NumPfTransposesForIo": 3.0,
|
| 890 |
+
"TilingProfiler::NumPfTransposesForLocal": 8.0,
|
| 891 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 2.0,
|
| 892 |
+
"TilingProfiler::PfTransposeInstructions": 25889.0,
|
| 893 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 19040.0,
|
| 894 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 5825.0,
|
| 895 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 1024.0,
|
| 896 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 8.0,
|
| 897 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 3225.0,
|
| 898 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
|
| 899 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
|
| 900 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
|
| 901 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
|
| 902 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
|
| 903 |
+
"TransformConvOp::conv2d_column_packing": 0.0,
|
| 904 |
+
"TransformConvOp::conv2d_column_packing_1": 0.0,
|
| 905 |
+
"TransformConvOp::conv2d_column_packing_io10": 0.0,
|
| 906 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
|
| 907 |
+
}
|
| 908 |
+
},
|
| 909 |
+
"sg01": {
|
| 910 |
+
"compiletime": {
|
| 911 |
+
"CanonicalizeConv": 3.000000106112566e-06,
|
| 912 |
+
"CanonicalizeForTensorizer": 1.2999999853491317e-05,
|
| 913 |
+
"Canonicalizer": 0.00024300000222865492,
|
| 914 |
+
"HoistCompute": 1.9999999949504854e-06,
|
| 915 |
+
"IdentifyCrossPassTensors": 1.1000000085914508e-05,
|
| 916 |
+
"MemcastMotion": 6.000000212225132e-06,
|
| 917 |
+
"PenguinizeFunctions": 1.2000000424450263e-05,
|
| 918 |
+
"PruneFunctions": 9.999999747378752e-06,
|
| 919 |
+
"RemoveOptimizationBarriers": 1.1000000085914508e-05,
|
| 920 |
+
"ScatterMotion": 0.0,
|
| 921 |
+
"TensorizerLegalizationPass": 1.2999999853491317e-05,
|
| 922 |
+
"VerifySupportedOps": 1.2000000424450263e-05,
|
| 923 |
+
"algsimp": 5.900000178371556e-05,
|
| 924 |
+
"batchnorm_expander": 1.2000000424450263e-05,
|
| 925 |
+
"boundary-marker-removal": 3.999999989900971e-06,
|
| 926 |
+
"call-inliner": 9.000000318337698e-06,
|
| 927 |
+
"canonicalize-boundary-marker": 4.999999873689376e-06,
|
| 928 |
+
"collective-stream-id-checker": 3.000000106112566e-06,
|
| 929 |
+
"comparison-expander": 3.999999989900971e-06,
|
| 930 |
+
"computation-deduplicator": 1.5999999959603883e-05,
|
| 931 |
+
"config-lowering": 2.2000000171829015e-05,
|
| 932 |
+
"constant_folding": 7.999999979801942e-06,
|
| 933 |
+
"cse": 2.9999999242136255e-05,
|
| 934 |
+
"dce": 9.999999974752427e-07,
|
| 935 |
+
"dynamic-slice-transpose": 3.999999989900971e-06,
|
| 936 |
+
"eliminate-redundant-compare": 3.999999989900971e-06,
|
| 937 |
+
"emit-offloaded-dropout": 1.2999999853491317e-05,
|
| 938 |
+
"flatten-call-graph": 7.999999979801942e-06,
|
| 939 |
+
"fuse-send-recv": 1.8999999156221747e-05,
|
| 940 |
+
"hilo-conditional-to-select": 4.999999873689376e-06,
|
| 941 |
+
"hilo::LegalizeAlias": 3.999999989900971e-06,
|
| 942 |
+
"hilo::NeuronInstCombine": 4.600000102072954e-05,
|
| 943 |
+
"hilo::NeuronOpFusion": 1.8000000636675395e-05,
|
| 944 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 1.1000000085914508e-05,
|
| 945 |
+
"hilo::ScheduleFusion": 7.000000096013537e-06,
|
| 946 |
+
"hilo::SixtyFourHack": 1.2000000424450263e-05,
|
| 947 |
+
"hilo::VerifyAliasing": 1.9999999949504854e-06,
|
| 948 |
+
"hlo-mac-count": 1.9999999494757503e-05,
|
| 949 |
+
"legalize-ccops-for-tensorizer": 9.999999974752427e-07,
|
| 950 |
+
"legalize-compare": 3.999999989900971e-06,
|
| 951 |
+
"lower-argminmax-custom-call": 3.999999989900971e-06,
|
| 952 |
+
"map-inline": 1.1000000085914508e-05,
|
| 953 |
+
"metadata-naming": 1.700000029813964e-05,
|
| 954 |
+
"mlir::detail::OpToOpPassAdaptor": 0.0006070000235922635,
|
| 955 |
+
"mlir::hlo::MhloToPyPenguin": 0.0010010000551119447,
|
| 956 |
+
"mlir::mhlo::LowerComplexExtraPass": 8.099999831756577e-05,
|
| 957 |
+
"mlir::mhlo::LowerComplexPass": 0.00011500000255182385,
|
| 958 |
+
"native-to-custom-softmax": 4.999999873689376e-06,
|
| 959 |
+
"native-to-custom-softmax-dx": 9.999999747378752e-06,
|
| 960 |
+
"neuron-hlo-verifier": 0.0003809999907389283,
|
| 961 |
+
"operand_upcaster": 1.2000000424450263e-05,
|
| 962 |
+
"post-par-pipe-begin": 9.999999974752427e-07,
|
| 963 |
+
"post-par-pipe-end": 0.0,
|
| 964 |
+
"post-partition-simplification": 0.0004670000053010881,
|
| 965 |
+
"replace-minimum-constant": 7.000000096013537e-06,
|
| 966 |
+
"reshape-mover": 3.000000106112566e-06,
|
| 967 |
+
"simplify-concat": 5.0999999075429514e-05,
|
| 968 |
+
"simplify-while-loops": 1.9999999949504854e-06,
|
| 969 |
+
"transform-variadic-reduce": 9.000000318337698e-06,
|
| 970 |
+
"tuple-simplifier": 4.999999873689376e-06,
|
| 971 |
+
"unpack-nested-aws-ntwsr": 3.999999989900971e-06,
|
| 972 |
+
"unroll-while-loop": 0.0
|
| 973 |
+
},
|
| 974 |
+
"hilo": {
|
| 975 |
+
"ArithmeticIntensity": 1360.724365234375,
|
| 976 |
+
"HloMacCount": 214748364800.0,
|
| 977 |
+
"Traffic": 315638304.0
|
| 978 |
+
}
|
| 979 |
+
},
|
| 980 |
+
"sg02": {
|
| 981 |
+
"compiletime": {
|
| 982 |
+
"CanonicalizeConv": 7.699999696342275e-05,
|
| 983 |
+
"CanonicalizeForTensorizer": 1.5999999959603883e-05,
|
| 984 |
+
"Canonicalizer": 0.00037900000461377203,
|
| 985 |
+
"HoistCompute": 0.0,
|
| 986 |
+
"IdentifyCrossPassTensors": 1.2000000424450263e-05,
|
| 987 |
+
"MemcastMotion": 0.0,
|
| 988 |
+
"PenguinizeFunctions": 1.1000000085914508e-05,
|
| 989 |
+
"PruneFunctions": 7.999999979801942e-06,
|
| 990 |
+
"RemoveOptimizationBarriers": 1.2000000424450263e-05,
|
| 991 |
+
"ScatterMotion": 1.1000000085914508e-05,
|
| 992 |
+
"TensorizerLegalizationPass": 9.000000318337698e-06,
|
| 993 |
+
"VerifySupportedOps": 1.700000029813964e-05,
|
| 994 |
+
"algsimp": 8.70000003487803e-05,
|
| 995 |
+
"batchnorm_expander": 1.700000029813964e-05,
|
| 996 |
+
"boundary-marker-removal": 6.000000212225132e-06,
|
| 997 |
+
"call-inliner": 1.2999999853491317e-05,
|
| 998 |
+
"canonicalize-boundary-marker": 7.999999979801942e-06,
|
| 999 |
+
"collective-stream-id-checker": 3.000000106112566e-06,
|
| 1000 |
+
"comparison-expander": 7.999999979801942e-06,
|
| 1001 |
+
"computation-deduplicator": 3.199999991920777e-05,
|
| 1002 |
+
"config-lowering": 3.400000059627928e-05,
|
| 1003 |
+
"constant_folding": 1.1000000085914508e-05,
|
| 1004 |
+
"cse": 1.700000029813964e-05,
|
| 1005 |
+
"dce": 9.999999974752427e-07,
|
| 1006 |
+
"dynamic-slice-transpose": 4.999999873689376e-06,
|
| 1007 |
+
"eliminate-redundant-compare": 6.000000212225132e-06,
|
| 1008 |
+
"emit-offloaded-dropout": 1.5999999959603883e-05,
|
| 1009 |
+
"flatten-call-graph": 1.2000000424450263e-05,
|
| 1010 |
+
"fuse-send-recv": 2.700000004551839e-05,
|
| 1011 |
+
"hilo-conditional-to-select": 7.000000096013537e-06,
|
| 1012 |
+
"hilo::LegalizeAlias": 4.999999873689376e-06,
|
| 1013 |
+
"hilo::NeuronInstCombine": 3.9999998989515007e-05,
|
| 1014 |
+
"hilo::NeuronOpFusion": 0.0,
|
| 1015 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 9.000000318337698e-06,
|
| 1016 |
+
"hilo::ScheduleFusion": 1.9999999949504854e-06,
|
| 1017 |
+
"hilo::SixtyFourHack": 1.2000000424450263e-05,
|
| 1018 |
+
"hilo::VerifyAliasing": 1.9999999949504854e-06,
|
| 1019 |
+
"hlo-mac-count": 3.199999991920777e-05,
|
| 1020 |
+
"legalize-ccops-for-tensorizer": 9.999999974752427e-07,
|
| 1021 |
+
"legalize-compare": 4.999999873689376e-06,
|
| 1022 |
+
"lower-argminmax-custom-call": 4.999999873689376e-06,
|
| 1023 |
+
"map-inline": 1.5999999959603883e-05,
|
| 1024 |
+
"metadata-naming": 2.300000051036477e-05,
|
| 1025 |
+
"mlir::detail::OpToOpPassAdaptor": 1.8000000636675395e-05,
|
| 1026 |
+
"mlir::hlo::MhloToPyPenguin": 0.0014220000011846423,
|
| 1027 |
+
"mlir::mhlo::LowerComplexExtraPass": 0.00012799999967683107,
|
| 1028 |
+
"mlir::mhlo::LowerComplexPass": 0.00019500000053085387,
|
| 1029 |
+
"native-to-custom-softmax": 9.000000318337698e-06,
|
| 1030 |
+
"native-to-custom-softmax-dx": 1.8999999156221747e-05,
|
| 1031 |
+
"neuron-hlo-verifier": 0.000534999999217689,
|
| 1032 |
+
"operand_upcaster": 1.700000029813964e-05,
|
| 1033 |
+
"post-par-pipe-begin": 1.9999999949504854e-06,
|
| 1034 |
+
"post-par-pipe-end": 0.0,
|
| 1035 |
+
"post-partition-simplification": 0.0006779999821446836,
|
| 1036 |
+
"replace-minimum-constant": 9.999999747378752e-06,
|
| 1037 |
+
"reshape-mover": 3.999999989900971e-06,
|
| 1038 |
+
"simplify-concat": 9.40000027185306e-05,
|
| 1039 |
+
"simplify-while-loops": 3.000000106112566e-06,
|
| 1040 |
+
"transform-variadic-reduce": 5.2999999752501026e-05,
|
| 1041 |
+
"tuple-simplifier": 7.000000096013537e-06,
|
| 1042 |
+
"unpack-nested-aws-ntwsr": 6.000000212225132e-06,
|
| 1043 |
+
"unroll-while-loop": 0.0
|
| 1044 |
+
},
|
| 1045 |
+
"hilo": {
|
| 1046 |
+
"ArithmeticIntensity": 688.8796997070313,
|
| 1047 |
+
"HloMacCount": 369678352384.0,
|
| 1048 |
+
"Traffic": 1073274048.0
|
| 1049 |
+
}
|
| 1050 |
+
}
|
| 1051 |
+
}
|
context_encoding_model/_tp0_bk0/graph.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a2f5ea2ec513d4c246461496be1df6a5408a8b32e0e3d5e8c000252b38c4eb0
|
| 3 |
+
size 4557824
|
context_encoding_model/_tp0_bk0/log-neuron-cc.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
context_encoding_model/_tp0_bk0/metaneff.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:979a3ce4ea645eb08474f1ced61267e507be927f3b1d9d6e3b8e280bf5249638
|
| 3 |
+
size 804702
|
context_encoding_model/_tp0_bk0/model.MODULE_e80578c547275f02c0fa+ed72d204.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8a1a7494d891cfc6c1e0596bf5b2088f381187ae6f1ab618b3f2610e62f323e
|
| 3 |
+
size 889460
|
context_encoding_model/_tp0_bk0/model.MODULE_e80578c547275f02c0fa+ed72d204.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a2f5ea2ec513d4c246461496be1df6a5408a8b32e0e3d5e8c000252b38c4eb0
|
| 3 |
+
size 4557824
|
layout_opt/command.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
neuronx-cc compile graph.hlo --framework XLA --target trn1 --output graph.neff --model-type=transformer -O1 --lnc=1 '--internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=true' --logfile=log-neuron-cc.txt --verbose=35
|
layout_opt/graph.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ee6e68f9bde84f2d463d428631187d977b7c48d6aab9ea708b3ee5b5fcd5d4c
|
| 3 |
+
size 7343104
|
layout_opt/log-neuron-cc.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
layout_opt/metaneff
ADDED
|
@@ -0,0 +1,982 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
(
|
| 3 |
+
input0�� �2embed_tokens.weight8
|
| 4 |
+
;
|
| 5 |
+
input1� �2'layers.0.self_attn.o_proj.o_proj.weight8
|
| 6 |
+
;
|
| 7 |
+
input2�� 2'layers.0.self_attn.qkv_proj.Wqkv.weight8
|
| 8 |
+
1
|
| 9 |
+
input3� 2layers.0.input_layernorm.weight8
|
| 10 |
+
7
|
| 11 |
+
input4�2%layers.0.self_attn.k_layernorm.weight8
|
| 12 |
+
7
|
| 13 |
+
input5�2%layers.0.self_attn.q_layernorm.weight8
|
| 14 |
+
1
|
| 15 |
+
input6� �02layers.0.mlp.down_proj.weight8
|
| 16 |
+
/
|
| 17 |
+
input7�0� 2layers.0.mlp.up_proj.weight8
|
| 18 |
+
:
|
| 19 |
+
input8� 2(layers.0.post_attention_layernorm.weight8
|
| 20 |
+
1
|
| 21 |
+
input9�0� 2layers.0.mlp.gate_proj.weight8
|
| 22 |
+
<
|
| 23 |
+
input10� �2'layers.1.self_attn.o_proj.o_proj.weight8
|
| 24 |
+
<
|
| 25 |
+
input11�� 2'layers.1.self_attn.qkv_proj.Wqkv.weight8
|
| 26 |
+
2
|
| 27 |
+
input12� 2layers.1.input_layernorm.weight8
|
| 28 |
+
8
|
| 29 |
+
input13�2%layers.1.self_attn.k_layernorm.weight8
|
| 30 |
+
8
|
| 31 |
+
input14�2%layers.1.self_attn.q_layernorm.weight8
|
| 32 |
+
2
|
| 33 |
+
input15� �02layers.1.mlp.down_proj.weight8
|
| 34 |
+
0
|
| 35 |
+
input16�0� 2layers.1.mlp.up_proj.weight8
|
| 36 |
+
;
|
| 37 |
+
input17� 2(layers.1.post_attention_layernorm.weight8
|
| 38 |
+
2
|
| 39 |
+
input18�0� 2layers.1.mlp.gate_proj.weight8
|
| 40 |
+
<
|
| 41 |
+
input19� �2'layers.2.self_attn.o_proj.o_proj.weight8
|
| 42 |
+
<
|
| 43 |
+
input20�� 2'layers.2.self_attn.qkv_proj.Wqkv.weight8
|
| 44 |
+
2
|
| 45 |
+
input21� 2layers.2.input_layernorm.weight8
|
| 46 |
+
8
|
| 47 |
+
input22�2%layers.2.self_attn.k_layernorm.weight8
|
| 48 |
+
8
|
| 49 |
+
input23�2%layers.2.self_attn.q_layernorm.weight8
|
| 50 |
+
2
|
| 51 |
+
input24� �02layers.2.mlp.down_proj.weight8
|
| 52 |
+
0
|
| 53 |
+
input25�0� 2layers.2.mlp.up_proj.weight8
|
| 54 |
+
;
|
| 55 |
+
input26� 2(layers.2.post_attention_layernorm.weight8
|
| 56 |
+
2
|
| 57 |
+
input27�0� 2layers.2.mlp.gate_proj.weight8
|
| 58 |
+
<
|
| 59 |
+
input28� �2'layers.3.self_attn.o_proj.o_proj.weight8
|
| 60 |
+
<
|
| 61 |
+
input29�� 2'layers.3.self_attn.qkv_proj.Wqkv.weight8
|
| 62 |
+
2
|
| 63 |
+
input30� 2layers.3.input_layernorm.weight8
|
| 64 |
+
8
|
| 65 |
+
input31�2%layers.3.self_attn.k_layernorm.weight8
|
| 66 |
+
8
|
| 67 |
+
input32�2%layers.3.self_attn.q_layernorm.weight8
|
| 68 |
+
2
|
| 69 |
+
input33� �02layers.3.mlp.down_proj.weight8
|
| 70 |
+
0
|
| 71 |
+
input34�0� 2layers.3.mlp.up_proj.weight8
|
| 72 |
+
;
|
| 73 |
+
input35� 2(layers.3.post_attention_layernorm.weight8
|
| 74 |
+
2
|
| 75 |
+
input36�0� 2layers.3.mlp.gate_proj.weight8
|
| 76 |
+
<
|
| 77 |
+
input37� �2'layers.4.self_attn.o_proj.o_proj.weight8
|
| 78 |
+
<
|
| 79 |
+
input38�� 2'layers.4.self_attn.qkv_proj.Wqkv.weight8
|
| 80 |
+
2
|
| 81 |
+
input39� 2layers.4.input_layernorm.weight8
|
| 82 |
+
8
|
| 83 |
+
input40�2%layers.4.self_attn.k_layernorm.weight8
|
| 84 |
+
8
|
| 85 |
+
input41�2%layers.4.self_attn.q_layernorm.weight8
|
| 86 |
+
2
|
| 87 |
+
input42� �02layers.4.mlp.down_proj.weight8
|
| 88 |
+
0
|
| 89 |
+
input43�0� 2layers.4.mlp.up_proj.weight8
|
| 90 |
+
;
|
| 91 |
+
input44� 2(layers.4.post_attention_layernorm.weight8
|
| 92 |
+
2
|
| 93 |
+
input45�0� 2layers.4.mlp.gate_proj.weight8
|
| 94 |
+
<
|
| 95 |
+
input46� �2'layers.5.self_attn.o_proj.o_proj.weight8
|
| 96 |
+
<
|
| 97 |
+
input47�� 2'layers.5.self_attn.qkv_proj.Wqkv.weight8
|
| 98 |
+
2
|
| 99 |
+
input48� 2layers.5.input_layernorm.weight8
|
| 100 |
+
8
|
| 101 |
+
input49�2%layers.5.self_attn.k_layernorm.weight8
|
| 102 |
+
8
|
| 103 |
+
input50�2%layers.5.self_attn.q_layernorm.weight8
|
| 104 |
+
2
|
| 105 |
+
input51� �02layers.5.mlp.down_proj.weight8
|
| 106 |
+
0
|
| 107 |
+
input52�0� 2layers.5.mlp.up_proj.weight8
|
| 108 |
+
;
|
| 109 |
+
input53� 2(layers.5.post_attention_layernorm.weight8
|
| 110 |
+
2
|
| 111 |
+
input54�0� 2layers.5.mlp.gate_proj.weight8
|
| 112 |
+
<
|
| 113 |
+
input55� �2'layers.6.self_attn.o_proj.o_proj.weight8
|
| 114 |
+
<
|
| 115 |
+
input56�� 2'layers.6.self_attn.qkv_proj.Wqkv.weight8
|
| 116 |
+
2
|
| 117 |
+
input57� 2layers.6.input_layernorm.weight8
|
| 118 |
+
8
|
| 119 |
+
input58�2%layers.6.self_attn.k_layernorm.weight8
|
| 120 |
+
8
|
| 121 |
+
input59�2%layers.6.self_attn.q_layernorm.weight8
|
| 122 |
+
2
|
| 123 |
+
input60� �02layers.6.mlp.down_proj.weight8
|
| 124 |
+
0
|
| 125 |
+
input61�0� 2layers.6.mlp.up_proj.weight8
|
| 126 |
+
;
|
| 127 |
+
input62� 2(layers.6.post_attention_layernorm.weight8
|
| 128 |
+
2
|
| 129 |
+
input63�0� 2layers.6.mlp.gate_proj.weight8
|
| 130 |
+
<
|
| 131 |
+
input64� �2'layers.7.self_attn.o_proj.o_proj.weight8
|
| 132 |
+
<
|
| 133 |
+
input65�� 2'layers.7.self_attn.qkv_proj.Wqkv.weight8
|
| 134 |
+
2
|
| 135 |
+
input66� 2layers.7.input_layernorm.weight8
|
| 136 |
+
8
|
| 137 |
+
input67�2%layers.7.self_attn.k_layernorm.weight8
|
| 138 |
+
8
|
| 139 |
+
input68�2%layers.7.self_attn.q_layernorm.weight8
|
| 140 |
+
2
|
| 141 |
+
input69� �02layers.7.mlp.down_proj.weight8
|
| 142 |
+
0
|
| 143 |
+
input70�0� 2layers.7.mlp.up_proj.weight8
|
| 144 |
+
;
|
| 145 |
+
input71� 2(layers.7.post_attention_layernorm.weight8
|
| 146 |
+
2
|
| 147 |
+
input72�0� 2layers.7.mlp.gate_proj.weight8
|
| 148 |
+
<
|
| 149 |
+
input73� �2'layers.8.self_attn.o_proj.o_proj.weight8
|
| 150 |
+
<
|
| 151 |
+
input74�� 2'layers.8.self_attn.qkv_proj.Wqkv.weight8
|
| 152 |
+
2
|
| 153 |
+
input75� 2layers.8.input_layernorm.weight8
|
| 154 |
+
8
|
| 155 |
+
input76�2%layers.8.self_attn.k_layernorm.weight8
|
| 156 |
+
8
|
| 157 |
+
input77�2%layers.8.self_attn.q_layernorm.weight8
|
| 158 |
+
2
|
| 159 |
+
input78� �02layers.8.mlp.down_proj.weight8
|
| 160 |
+
0
|
| 161 |
+
input79�0� 2layers.8.mlp.up_proj.weight8
|
| 162 |
+
;
|
| 163 |
+
input80� 2(layers.8.post_attention_layernorm.weight8
|
| 164 |
+
2
|
| 165 |
+
input81�0� 2layers.8.mlp.gate_proj.weight8
|
| 166 |
+
<
|
| 167 |
+
input82� �2'layers.9.self_attn.o_proj.o_proj.weight8
|
| 168 |
+
<
|
| 169 |
+
input83�� 2'layers.9.self_attn.qkv_proj.Wqkv.weight8
|
| 170 |
+
2
|
| 171 |
+
input84� 2layers.9.input_layernorm.weight8
|
| 172 |
+
8
|
| 173 |
+
input85�2%layers.9.self_attn.k_layernorm.weight8
|
| 174 |
+
8
|
| 175 |
+
input86�2%layers.9.self_attn.q_layernorm.weight8
|
| 176 |
+
2
|
| 177 |
+
input87� �02layers.9.mlp.down_proj.weight8
|
| 178 |
+
0
|
| 179 |
+
input88�0� 2layers.9.mlp.up_proj.weight8
|
| 180 |
+
;
|
| 181 |
+
input89� 2(layers.9.post_attention_layernorm.weight8
|
| 182 |
+
2
|
| 183 |
+
input90�0� 2layers.9.mlp.gate_proj.weight8
|
| 184 |
+
=
|
| 185 |
+
input91� �2(layers.10.self_attn.o_proj.o_proj.weight8
|
| 186 |
+
=
|
| 187 |
+
input92�� 2(layers.10.self_attn.qkv_proj.Wqkv.weight8
|
| 188 |
+
3
|
| 189 |
+
input93� 2 layers.10.input_layernorm.weight8
|
| 190 |
+
9
|
| 191 |
+
input94�2&layers.10.self_attn.k_layernorm.weight8
|
| 192 |
+
9
|
| 193 |
+
input95�2&layers.10.self_attn.q_layernorm.weight8
|
| 194 |
+
3
|
| 195 |
+
input96� �02layers.10.mlp.down_proj.weight8
|
| 196 |
+
1
|
| 197 |
+
input97�0� 2layers.10.mlp.up_proj.weight8
|
| 198 |
+
<
|
| 199 |
+
input98� 2)layers.10.post_attention_layernorm.weight8
|
| 200 |
+
3
|
| 201 |
+
input99�0� 2layers.10.mlp.gate_proj.weight8
|
| 202 |
+
>
|
| 203 |
+
input100� �2(layers.11.self_attn.o_proj.o_proj.weight8
|
| 204 |
+
>
|
| 205 |
+
input101�� 2(layers.11.self_attn.qkv_proj.Wqkv.weight8
|
| 206 |
+
4
|
| 207 |
+
input102� 2 layers.11.input_layernorm.weight8
|
| 208 |
+
:
|
| 209 |
+
input103�2&layers.11.self_attn.k_layernorm.weight8
|
| 210 |
+
:
|
| 211 |
+
input104�2&layers.11.self_attn.q_layernorm.weight8
|
| 212 |
+
4
|
| 213 |
+
input105� �02layers.11.mlp.down_proj.weight8
|
| 214 |
+
2
|
| 215 |
+
input106�0� 2layers.11.mlp.up_proj.weight8
|
| 216 |
+
=
|
| 217 |
+
input107� 2)layers.11.post_attention_layernorm.weight8
|
| 218 |
+
4
|
| 219 |
+
input108�0� 2layers.11.mlp.gate_proj.weight8
|
| 220 |
+
>
|
| 221 |
+
input109� �2(layers.12.self_attn.o_proj.o_proj.weight8
|
| 222 |
+
>
|
| 223 |
+
input110�� 2(layers.12.self_attn.qkv_proj.Wqkv.weight8
|
| 224 |
+
4
|
| 225 |
+
input111� 2 layers.12.input_layernorm.weight8
|
| 226 |
+
:
|
| 227 |
+
input112�2&layers.12.self_attn.k_layernorm.weight8
|
| 228 |
+
:
|
| 229 |
+
input113�2&layers.12.self_attn.q_layernorm.weight8
|
| 230 |
+
4
|
| 231 |
+
input114� �02layers.12.mlp.down_proj.weight8
|
| 232 |
+
2
|
| 233 |
+
input115�0� 2layers.12.mlp.up_proj.weight8
|
| 234 |
+
=
|
| 235 |
+
input116� 2)layers.12.post_attention_layernorm.weight8
|
| 236 |
+
4
|
| 237 |
+
input117�0� 2layers.12.mlp.gate_proj.weight8
|
| 238 |
+
>
|
| 239 |
+
input118� �2(layers.13.self_attn.o_proj.o_proj.weight8
|
| 240 |
+
>
|
| 241 |
+
input119�� 2(layers.13.self_attn.qkv_proj.Wqkv.weight8
|
| 242 |
+
4
|
| 243 |
+
input120� 2 layers.13.input_layernorm.weight8
|
| 244 |
+
:
|
| 245 |
+
input121�2&layers.13.self_attn.k_layernorm.weight8
|
| 246 |
+
:
|
| 247 |
+
input122�2&layers.13.self_attn.q_layernorm.weight8
|
| 248 |
+
4
|
| 249 |
+
input123� �02layers.13.mlp.down_proj.weight8
|
| 250 |
+
2
|
| 251 |
+
input124�0� 2layers.13.mlp.up_proj.weight8
|
| 252 |
+
=
|
| 253 |
+
input125� 2)layers.13.post_attention_layernorm.weight8
|
| 254 |
+
4
|
| 255 |
+
input126�0� 2layers.13.mlp.gate_proj.weight8
|
| 256 |
+
>
|
| 257 |
+
input127� �2(layers.14.self_attn.o_proj.o_proj.weight8
|
| 258 |
+
>
|
| 259 |
+
input128�� 2(layers.14.self_attn.qkv_proj.Wqkv.weight8
|
| 260 |
+
4
|
| 261 |
+
input129� 2 layers.14.input_layernorm.weight8
|
| 262 |
+
:
|
| 263 |
+
input130�2&layers.14.self_attn.k_layernorm.weight8
|
| 264 |
+
:
|
| 265 |
+
input131�2&layers.14.self_attn.q_layernorm.weight8
|
| 266 |
+
4
|
| 267 |
+
input132� �02layers.14.mlp.down_proj.weight8
|
| 268 |
+
2
|
| 269 |
+
input133�0� 2layers.14.mlp.up_proj.weight8
|
| 270 |
+
=
|
| 271 |
+
input134� 2)layers.14.post_attention_layernorm.weight8
|
| 272 |
+
4
|
| 273 |
+
input135�0� 2layers.14.mlp.gate_proj.weight8
|
| 274 |
+
>
|
| 275 |
+
input136� �2(layers.15.self_attn.o_proj.o_proj.weight8
|
| 276 |
+
>
|
| 277 |
+
input137�� 2(layers.15.self_attn.qkv_proj.Wqkv.weight8
|
| 278 |
+
4
|
| 279 |
+
input138� 2 layers.15.input_layernorm.weight8
|
| 280 |
+
:
|
| 281 |
+
input139�2&layers.15.self_attn.k_layernorm.weight8
|
| 282 |
+
:
|
| 283 |
+
input140�2&layers.15.self_attn.q_layernorm.weight8
|
| 284 |
+
4
|
| 285 |
+
input141� �02layers.15.mlp.down_proj.weight8
|
| 286 |
+
2
|
| 287 |
+
input142�0� 2layers.15.mlp.up_proj.weight8
|
| 288 |
+
=
|
| 289 |
+
input143� 2)layers.15.post_attention_layernorm.weight8
|
| 290 |
+
4
|
| 291 |
+
input144�0� 2layers.15.mlp.gate_proj.weight8
|
| 292 |
+
>
|
| 293 |
+
input145� �2(layers.16.self_attn.o_proj.o_proj.weight8
|
| 294 |
+
>
|
| 295 |
+
input146�� 2(layers.16.self_attn.qkv_proj.Wqkv.weight8
|
| 296 |
+
4
|
| 297 |
+
input147� 2 layers.16.input_layernorm.weight8
|
| 298 |
+
:
|
| 299 |
+
input148�2&layers.16.self_attn.k_layernorm.weight8
|
| 300 |
+
:
|
| 301 |
+
input149�2&layers.16.self_attn.q_layernorm.weight8
|
| 302 |
+
4
|
| 303 |
+
input150� �02layers.16.mlp.down_proj.weight8
|
| 304 |
+
2
|
| 305 |
+
input151�0� 2layers.16.mlp.up_proj.weight8
|
| 306 |
+
=
|
| 307 |
+
input152� 2)layers.16.post_attention_layernorm.weight8
|
| 308 |
+
4
|
| 309 |
+
input153�0� 2layers.16.mlp.gate_proj.weight8
|
| 310 |
+
>
|
| 311 |
+
input154� �2(layers.17.self_attn.o_proj.o_proj.weight8
|
| 312 |
+
>
|
| 313 |
+
input155�� 2(layers.17.self_attn.qkv_proj.Wqkv.weight8
|
| 314 |
+
4
|
| 315 |
+
input156� 2 layers.17.input_layernorm.weight8
|
| 316 |
+
:
|
| 317 |
+
input157�2&layers.17.self_attn.k_layernorm.weight8
|
| 318 |
+
:
|
| 319 |
+
input158�2&layers.17.self_attn.q_layernorm.weight8
|
| 320 |
+
4
|
| 321 |
+
input159� �02layers.17.mlp.down_proj.weight8
|
| 322 |
+
2
|
| 323 |
+
input160�0� 2layers.17.mlp.up_proj.weight8
|
| 324 |
+
=
|
| 325 |
+
input161� 2)layers.17.post_attention_layernorm.weight8
|
| 326 |
+
4
|
| 327 |
+
input162�0� 2layers.17.mlp.gate_proj.weight8
|
| 328 |
+
>
|
| 329 |
+
input163� �2(layers.18.self_attn.o_proj.o_proj.weight8
|
| 330 |
+
>
|
| 331 |
+
input164�� 2(layers.18.self_attn.qkv_proj.Wqkv.weight8
|
| 332 |
+
4
|
| 333 |
+
input165� 2 layers.18.input_layernorm.weight8
|
| 334 |
+
:
|
| 335 |
+
input166�2&layers.18.self_attn.k_layernorm.weight8
|
| 336 |
+
:
|
| 337 |
+
input167�2&layers.18.self_attn.q_layernorm.weight8
|
| 338 |
+
4
|
| 339 |
+
input168� �02layers.18.mlp.down_proj.weight8
|
| 340 |
+
2
|
| 341 |
+
input169�0� 2layers.18.mlp.up_proj.weight8
|
| 342 |
+
=
|
| 343 |
+
input170� 2)layers.18.post_attention_layernorm.weight8
|
| 344 |
+
4
|
| 345 |
+
input171�0� 2layers.18.mlp.gate_proj.weight8
|
| 346 |
+
>
|
| 347 |
+
input172� �2(layers.19.self_attn.o_proj.o_proj.weight8
|
| 348 |
+
>
|
| 349 |
+
input173�� 2(layers.19.self_attn.qkv_proj.Wqkv.weight8
|
| 350 |
+
4
|
| 351 |
+
input174� 2 layers.19.input_layernorm.weight8
|
| 352 |
+
:
|
| 353 |
+
input175�2&layers.19.self_attn.k_layernorm.weight8
|
| 354 |
+
:
|
| 355 |
+
input176�2&layers.19.self_attn.q_layernorm.weight8
|
| 356 |
+
4
|
| 357 |
+
input177� �02layers.19.mlp.down_proj.weight8
|
| 358 |
+
2
|
| 359 |
+
input178�0� 2layers.19.mlp.up_proj.weight8
|
| 360 |
+
=
|
| 361 |
+
input179� 2)layers.19.post_attention_layernorm.weight8
|
| 362 |
+
4
|
| 363 |
+
input180�0� 2layers.19.mlp.gate_proj.weight8
|
| 364 |
+
>
|
| 365 |
+
input181� �2(layers.20.self_attn.o_proj.o_proj.weight8
|
| 366 |
+
>
|
| 367 |
+
input182�� 2(layers.20.self_attn.qkv_proj.Wqkv.weight8
|
| 368 |
+
4
|
| 369 |
+
input183� 2 layers.20.input_layernorm.weight8
|
| 370 |
+
:
|
| 371 |
+
input184�2&layers.20.self_attn.k_layernorm.weight8
|
| 372 |
+
:
|
| 373 |
+
input185�2&layers.20.self_attn.q_layernorm.weight8
|
| 374 |
+
4
|
| 375 |
+
input186� �02layers.20.mlp.down_proj.weight8
|
| 376 |
+
2
|
| 377 |
+
input187�0� 2layers.20.mlp.up_proj.weight8
|
| 378 |
+
=
|
| 379 |
+
input188� 2)layers.20.post_attention_layernorm.weight8
|
| 380 |
+
4
|
| 381 |
+
input189�0� 2layers.20.mlp.gate_proj.weight8
|
| 382 |
+
>
|
| 383 |
+
input190� �2(layers.21.self_attn.o_proj.o_proj.weight8
|
| 384 |
+
>
|
| 385 |
+
input191�� 2(layers.21.self_attn.qkv_proj.Wqkv.weight8
|
| 386 |
+
4
|
| 387 |
+
input192� 2 layers.21.input_layernorm.weight8
|
| 388 |
+
:
|
| 389 |
+
input193�2&layers.21.self_attn.k_layernorm.weight8
|
| 390 |
+
:
|
| 391 |
+
input194�2&layers.21.self_attn.q_layernorm.weight8
|
| 392 |
+
4
|
| 393 |
+
input195� �02layers.21.mlp.down_proj.weight8
|
| 394 |
+
2
|
| 395 |
+
input196�0� 2layers.21.mlp.up_proj.weight8
|
| 396 |
+
=
|
| 397 |
+
input197� 2)layers.21.post_attention_layernorm.weight8
|
| 398 |
+
4
|
| 399 |
+
input198�0� 2layers.21.mlp.gate_proj.weight8
|
| 400 |
+
>
|
| 401 |
+
input199� �2(layers.22.self_attn.o_proj.o_proj.weight8
|
| 402 |
+
>
|
| 403 |
+
input200�� 2(layers.22.self_attn.qkv_proj.Wqkv.weight8
|
| 404 |
+
4
|
| 405 |
+
input201� 2 layers.22.input_layernorm.weight8
|
| 406 |
+
:
|
| 407 |
+
input202�2&layers.22.self_attn.k_layernorm.weight8
|
| 408 |
+
:
|
| 409 |
+
input203�2&layers.22.self_attn.q_layernorm.weight8
|
| 410 |
+
4
|
| 411 |
+
input204� �02layers.22.mlp.down_proj.weight8
|
| 412 |
+
2
|
| 413 |
+
input205�0� 2layers.22.mlp.up_proj.weight8
|
| 414 |
+
=
|
| 415 |
+
input206� 2)layers.22.post_attention_layernorm.weight8
|
| 416 |
+
4
|
| 417 |
+
input207�0� 2layers.22.mlp.gate_proj.weight8
|
| 418 |
+
>
|
| 419 |
+
input208� �2(layers.23.self_attn.o_proj.o_proj.weight8
|
| 420 |
+
>
|
| 421 |
+
input209�� 2(layers.23.self_attn.qkv_proj.Wqkv.weight8
|
| 422 |
+
4
|
| 423 |
+
input210� 2 layers.23.input_layernorm.weight8
|
| 424 |
+
:
|
| 425 |
+
input211�2&layers.23.self_attn.k_layernorm.weight8
|
| 426 |
+
:
|
| 427 |
+
input212�2&layers.23.self_attn.q_layernorm.weight8
|
| 428 |
+
4
|
| 429 |
+
input213� �02layers.23.mlp.down_proj.weight8
|
| 430 |
+
2
|
| 431 |
+
input214�0� 2layers.23.mlp.up_proj.weight8
|
| 432 |
+
=
|
| 433 |
+
input215� 2)layers.23.post_attention_layernorm.weight8
|
| 434 |
+
4
|
| 435 |
+
input216�0� 2layers.23.mlp.gate_proj.weight8
|
| 436 |
+
>
|
| 437 |
+
input217� �2(layers.24.self_attn.o_proj.o_proj.weight8
|
| 438 |
+
>
|
| 439 |
+
input218�� 2(layers.24.self_attn.qkv_proj.Wqkv.weight8
|
| 440 |
+
4
|
| 441 |
+
input219� 2 layers.24.input_layernorm.weight8
|
| 442 |
+
:
|
| 443 |
+
input220�2&layers.24.self_attn.k_layernorm.weight8
|
| 444 |
+
:
|
| 445 |
+
input221�2&layers.24.self_attn.q_layernorm.weight8
|
| 446 |
+
4
|
| 447 |
+
input222� �02layers.24.mlp.down_proj.weight8
|
| 448 |
+
2
|
| 449 |
+
input223�0� 2layers.24.mlp.up_proj.weight8
|
| 450 |
+
=
|
| 451 |
+
input224� 2)layers.24.post_attention_layernorm.weight8
|
| 452 |
+
4
|
| 453 |
+
input225�0� 2layers.24.mlp.gate_proj.weight8
|
| 454 |
+
>
|
| 455 |
+
input226� �2(layers.25.self_attn.o_proj.o_proj.weight8
|
| 456 |
+
>
|
| 457 |
+
input227�� 2(layers.25.self_attn.qkv_proj.Wqkv.weight8
|
| 458 |
+
4
|
| 459 |
+
input228� 2 layers.25.input_layernorm.weight8
|
| 460 |
+
:
|
| 461 |
+
input229�2&layers.25.self_attn.k_layernorm.weight8
|
| 462 |
+
:
|
| 463 |
+
input230�2&layers.25.self_attn.q_layernorm.weight8
|
| 464 |
+
4
|
| 465 |
+
input231� �02layers.25.mlp.down_proj.weight8
|
| 466 |
+
2
|
| 467 |
+
input232�0� 2layers.25.mlp.up_proj.weight8
|
| 468 |
+
=
|
| 469 |
+
input233� 2)layers.25.post_attention_layernorm.weight8
|
| 470 |
+
4
|
| 471 |
+
input234�0� 2layers.25.mlp.gate_proj.weight8
|
| 472 |
+
>
|
| 473 |
+
input235� �2(layers.26.self_attn.o_proj.o_proj.weight8
|
| 474 |
+
>
|
| 475 |
+
input236�� 2(layers.26.self_attn.qkv_proj.Wqkv.weight8
|
| 476 |
+
4
|
| 477 |
+
input237� 2 layers.26.input_layernorm.weight8
|
| 478 |
+
:
|
| 479 |
+
input238�2&layers.26.self_attn.k_layernorm.weight8
|
| 480 |
+
:
|
| 481 |
+
input239�2&layers.26.self_attn.q_layernorm.weight8
|
| 482 |
+
4
|
| 483 |
+
input240� �02layers.26.mlp.down_proj.weight8
|
| 484 |
+
2
|
| 485 |
+
input241�0� 2layers.26.mlp.up_proj.weight8
|
| 486 |
+
=
|
| 487 |
+
input242� 2)layers.26.post_attention_layernorm.weight8
|
| 488 |
+
4
|
| 489 |
+
input243�0� 2layers.26.mlp.gate_proj.weight8
|
| 490 |
+
>
|
| 491 |
+
input244� �2(layers.27.self_attn.o_proj.o_proj.weight8
|
| 492 |
+
>
|
| 493 |
+
input245�� 2(layers.27.self_attn.qkv_proj.Wqkv.weight8
|
| 494 |
+
4
|
| 495 |
+
input246� 2 layers.27.input_layernorm.weight8
|
| 496 |
+
:
|
| 497 |
+
input247�2&layers.27.self_attn.k_layernorm.weight8
|
| 498 |
+
:
|
| 499 |
+
input248�2&layers.27.self_attn.q_layernorm.weight8
|
| 500 |
+
4
|
| 501 |
+
input249� �02layers.27.mlp.down_proj.weight8
|
| 502 |
+
2
|
| 503 |
+
input250�0� 2layers.27.mlp.up_proj.weight8
|
| 504 |
+
=
|
| 505 |
+
input251� 2)layers.27.post_attention_layernorm.weight8
|
| 506 |
+
4
|
| 507 |
+
input252�0� 2layers.27.mlp.gate_proj.weight8
|
| 508 |
+
>
|
| 509 |
+
input253� �2(layers.28.self_attn.o_proj.o_proj.weight8
|
| 510 |
+
>
|
| 511 |
+
input254�� 2(layers.28.self_attn.qkv_proj.Wqkv.weight8
|
| 512 |
+
4
|
| 513 |
+
input255� 2 layers.28.input_layernorm.weight8
|
| 514 |
+
:
|
| 515 |
+
input256�2&layers.28.self_attn.k_layernorm.weight8
|
| 516 |
+
:
|
| 517 |
+
input257�2&layers.28.self_attn.q_layernorm.weight8
|
| 518 |
+
4
|
| 519 |
+
input258� �02layers.28.mlp.down_proj.weight8
|
| 520 |
+
2
|
| 521 |
+
input259�0� 2layers.28.mlp.up_proj.weight8
|
| 522 |
+
=
|
| 523 |
+
input260� 2)layers.28.post_attention_layernorm.weight8
|
| 524 |
+
4
|
| 525 |
+
input261�0� 2layers.28.mlp.gate_proj.weight8
|
| 526 |
+
>
|
| 527 |
+
input262� �2(layers.29.self_attn.o_proj.o_proj.weight8
|
| 528 |
+
>
|
| 529 |
+
input263�� 2(layers.29.self_attn.qkv_proj.Wqkv.weight8
|
| 530 |
+
4
|
| 531 |
+
input264� 2 layers.29.input_layernorm.weight8
|
| 532 |
+
:
|
| 533 |
+
input265�2&layers.29.self_attn.k_layernorm.weight8
|
| 534 |
+
:
|
| 535 |
+
input266�2&layers.29.self_attn.q_layernorm.weight8
|
| 536 |
+
4
|
| 537 |
+
input267� �02layers.29.mlp.down_proj.weight8
|
| 538 |
+
2
|
| 539 |
+
input268�0� 2layers.29.mlp.up_proj.weight8
|
| 540 |
+
=
|
| 541 |
+
input269� 2)layers.29.post_attention_layernorm.weight8
|
| 542 |
+
4
|
| 543 |
+
input270�0� 2layers.29.mlp.gate_proj.weight8
|
| 544 |
+
>
|
| 545 |
+
input271� �2(layers.30.self_attn.o_proj.o_proj.weight8
|
| 546 |
+
>
|
| 547 |
+
input272�� 2(layers.30.self_attn.qkv_proj.Wqkv.weight8
|
| 548 |
+
4
|
| 549 |
+
input273� 2 layers.30.input_layernorm.weight8
|
| 550 |
+
:
|
| 551 |
+
input274�2&layers.30.self_attn.k_layernorm.weight8
|
| 552 |
+
:
|
| 553 |
+
input275�2&layers.30.self_attn.q_layernorm.weight8
|
| 554 |
+
4
|
| 555 |
+
input276� �02layers.30.mlp.down_proj.weight8
|
| 556 |
+
2
|
| 557 |
+
input277�0� 2layers.30.mlp.up_proj.weight8
|
| 558 |
+
=
|
| 559 |
+
input278� 2)layers.30.post_attention_layernorm.weight8
|
| 560 |
+
4
|
| 561 |
+
input279�0� 2layers.30.mlp.gate_proj.weight8
|
| 562 |
+
>
|
| 563 |
+
input280� �2(layers.31.self_attn.o_proj.o_proj.weight8
|
| 564 |
+
>
|
| 565 |
+
input281�� 2(layers.31.self_attn.qkv_proj.Wqkv.weight8
|
| 566 |
+
4
|
| 567 |
+
input282� 2 layers.31.input_layernorm.weight8
|
| 568 |
+
:
|
| 569 |
+
input283�2&layers.31.self_attn.k_layernorm.weight8
|
| 570 |
+
:
|
| 571 |
+
input284�2&layers.31.self_attn.q_layernorm.weight8
|
| 572 |
+
4
|
| 573 |
+
input285� �02layers.31.mlp.down_proj.weight8
|
| 574 |
+
2
|
| 575 |
+
input286�0� 2layers.31.mlp.up_proj.weight8
|
| 576 |
+
=
|
| 577 |
+
input287� 2)layers.31.post_attention_layernorm.weight8
|
| 578 |
+
4
|
| 579 |
+
input288�0� 2layers.31.mlp.gate_proj.weight8
|
| 580 |
+
>
|
| 581 |
+
input289� �2(layers.32.self_attn.o_proj.o_proj.weight8
|
| 582 |
+
>
|
| 583 |
+
input290�� 2(layers.32.self_attn.qkv_proj.Wqkv.weight8
|
| 584 |
+
4
|
| 585 |
+
input291� 2 layers.32.input_layernorm.weight8
|
| 586 |
+
:
|
| 587 |
+
input292�2&layers.32.self_attn.k_layernorm.weight8
|
| 588 |
+
:
|
| 589 |
+
input293�2&layers.32.self_attn.q_layernorm.weight8
|
| 590 |
+
4
|
| 591 |
+
input294� �02layers.32.mlp.down_proj.weight8
|
| 592 |
+
2
|
| 593 |
+
input295�0� 2layers.32.mlp.up_proj.weight8
|
| 594 |
+
=
|
| 595 |
+
input296� 2)layers.32.post_attention_layernorm.weight8
|
| 596 |
+
4
|
| 597 |
+
input297�0� 2layers.32.mlp.gate_proj.weight8
|
| 598 |
+
>
|
| 599 |
+
input298� �2(layers.33.self_attn.o_proj.o_proj.weight8
|
| 600 |
+
>
|
| 601 |
+
input299�� 2(layers.33.self_attn.qkv_proj.Wqkv.weight8
|
| 602 |
+
4
|
| 603 |
+
input300� 2 layers.33.input_layernorm.weight8
|
| 604 |
+
:
|
| 605 |
+
input301�2&layers.33.self_attn.k_layernorm.weight8
|
| 606 |
+
:
|
| 607 |
+
input302�2&layers.33.self_attn.q_layernorm.weight8
|
| 608 |
+
4
|
| 609 |
+
input303� �02layers.33.mlp.down_proj.weight8
|
| 610 |
+
2
|
| 611 |
+
input304�0� 2layers.33.mlp.up_proj.weight8
|
| 612 |
+
=
|
| 613 |
+
input305� 2)layers.33.post_attention_layernorm.weight8
|
| 614 |
+
4
|
| 615 |
+
input306�0� 2layers.33.mlp.gate_proj.weight8
|
| 616 |
+
>
|
| 617 |
+
input307� �2(layers.34.self_attn.o_proj.o_proj.weight8
|
| 618 |
+
>
|
| 619 |
+
input308�� 2(layers.34.self_attn.qkv_proj.Wqkv.weight8
|
| 620 |
+
4
|
| 621 |
+
input309� 2 layers.34.input_layernorm.weight8
|
| 622 |
+
:
|
| 623 |
+
input310�2&layers.34.self_attn.k_layernorm.weight8
|
| 624 |
+
:
|
| 625 |
+
input311�2&layers.34.self_attn.q_layernorm.weight8
|
| 626 |
+
4
|
| 627 |
+
input312� �02layers.34.mlp.down_proj.weight8
|
| 628 |
+
2
|
| 629 |
+
input313�0� 2layers.34.mlp.up_proj.weight8
|
| 630 |
+
=
|
| 631 |
+
input314� 2)layers.34.post_attention_layernorm.weight8
|
| 632 |
+
4
|
| 633 |
+
input315�0� 2layers.34.mlp.gate_proj.weight8
|
| 634 |
+
>
|
| 635 |
+
input316� �2(layers.35.self_attn.o_proj.o_proj.weight8
|
| 636 |
+
>
|
| 637 |
+
input317�� 2(layers.35.self_attn.qkv_proj.Wqkv.weight8
|
| 638 |
+
4
|
| 639 |
+
input318� 2 layers.35.input_layernorm.weight8
|
| 640 |
+
:
|
| 641 |
+
input319�2&layers.35.self_attn.k_layernorm.weight8
|
| 642 |
+
:
|
| 643 |
+
input320�2&layers.35.self_attn.q_layernorm.weight8
|
| 644 |
+
4
|
| 645 |
+
input321� �02layers.35.mlp.down_proj.weight8
|
| 646 |
+
2
|
| 647 |
+
input322�0� 2layers.35.mlp.up_proj.weight8
|
| 648 |
+
=
|
| 649 |
+
input323� 2)layers.35.post_attention_layernorm.weight8
|
| 650 |
+
4
|
| 651 |
+
input324�0� 2layers.35.mlp.gate_proj.weight8
|
| 652 |
+
%
|
| 653 |
+
input325��� 2lm_head.weight8
|
| 654 |
+
|
| 655 |
+
input326� 2norm.weight8'
|
| 656 |
+
output0�� �2embed_tokens.weight:
|
| 657 |
+
output1� �2'layers.0.self_attn.o_proj.o_proj.weight:
|
| 658 |
+
output2�� 2'layers.0.self_attn.qkv_proj.Wqkv.weight0
|
| 659 |
+
output3� 2layers.0.input_layernorm.weight6
|
| 660 |
+
output4�2%layers.0.self_attn.k_layernorm.weight6
|
| 661 |
+
output5�2%layers.0.self_attn.q_layernorm.weight0
|
| 662 |
+
output6� �02layers.0.mlp.down_proj.weight.
|
| 663 |
+
output7�0� 2layers.0.mlp.up_proj.weight9
|
| 664 |
+
output8� 2(layers.0.post_attention_layernorm.weight0
|
| 665 |
+
output9�0� 2layers.0.mlp.gate_proj.weight;
|
| 666 |
+
output10� �2'layers.1.self_attn.o_proj.o_proj.weight;
|
| 667 |
+
output11�� 2'layers.1.self_attn.qkv_proj.Wqkv.weight1
|
| 668 |
+
output12� 2layers.1.input_layernorm.weight7
|
| 669 |
+
output13�2%layers.1.self_attn.k_layernorm.weight7
|
| 670 |
+
output14�2%layers.1.self_attn.q_layernorm.weight1
|
| 671 |
+
output15� �02layers.1.mlp.down_proj.weight/
|
| 672 |
+
output16�0� 2layers.1.mlp.up_proj.weight:
|
| 673 |
+
output17� 2(layers.1.post_attention_layernorm.weight1
|
| 674 |
+
output18�0� 2layers.1.mlp.gate_proj.weight;
|
| 675 |
+
output19� �2'layers.2.self_attn.o_proj.o_proj.weight;
|
| 676 |
+
output20�� 2'layers.2.self_attn.qkv_proj.Wqkv.weight1
|
| 677 |
+
output21� 2layers.2.input_layernorm.weight7
|
| 678 |
+
output22�2%layers.2.self_attn.k_layernorm.weight7
|
| 679 |
+
output23�2%layers.2.self_attn.q_layernorm.weight1
|
| 680 |
+
output24� �02layers.2.mlp.down_proj.weight/
|
| 681 |
+
output25�0� 2layers.2.mlp.up_proj.weight:
|
| 682 |
+
output26� 2(layers.2.post_attention_layernorm.weight1
|
| 683 |
+
output27�0� 2layers.2.mlp.gate_proj.weight;
|
| 684 |
+
output28� �2'layers.3.self_attn.o_proj.o_proj.weight;
|
| 685 |
+
output29�� 2'layers.3.self_attn.qkv_proj.Wqkv.weight1
|
| 686 |
+
output30� 2layers.3.input_layernorm.weight7
|
| 687 |
+
output31�2%layers.3.self_attn.k_layernorm.weight7
|
| 688 |
+
output32�2%layers.3.self_attn.q_layernorm.weight1
|
| 689 |
+
output33� �02layers.3.mlp.down_proj.weight/
|
| 690 |
+
output34�0� 2layers.3.mlp.up_proj.weight:
|
| 691 |
+
output35� 2(layers.3.post_attention_layernorm.weight1
|
| 692 |
+
output36�0� 2layers.3.mlp.gate_proj.weight;
|
| 693 |
+
output37� �2'layers.4.self_attn.o_proj.o_proj.weight;
|
| 694 |
+
output38�� 2'layers.4.self_attn.qkv_proj.Wqkv.weight1
|
| 695 |
+
output39� 2layers.4.input_layernorm.weight7
|
| 696 |
+
output40�2%layers.4.self_attn.k_layernorm.weight7
|
| 697 |
+
output41�2%layers.4.self_attn.q_layernorm.weight1
|
| 698 |
+
output42� �02layers.4.mlp.down_proj.weight/
|
| 699 |
+
output43�0� 2layers.4.mlp.up_proj.weight:
|
| 700 |
+
output44� 2(layers.4.post_attention_layernorm.weight1
|
| 701 |
+
output45�0� 2layers.4.mlp.gate_proj.weight;
|
| 702 |
+
output46� �2'layers.5.self_attn.o_proj.o_proj.weight;
|
| 703 |
+
output47�� 2'layers.5.self_attn.qkv_proj.Wqkv.weight1
|
| 704 |
+
output48� 2layers.5.input_layernorm.weight7
|
| 705 |
+
output49�2%layers.5.self_attn.k_layernorm.weight7
|
| 706 |
+
output50�2%layers.5.self_attn.q_layernorm.weight1
|
| 707 |
+
output51� �02layers.5.mlp.down_proj.weight/
|
| 708 |
+
output52�0� 2layers.5.mlp.up_proj.weight:
|
| 709 |
+
output53� 2(layers.5.post_attention_layernorm.weight1
|
| 710 |
+
output54�0� 2layers.5.mlp.gate_proj.weight;
|
| 711 |
+
output55� �2'layers.6.self_attn.o_proj.o_proj.weight;
|
| 712 |
+
output56�� 2'layers.6.self_attn.qkv_proj.Wqkv.weight1
|
| 713 |
+
output57� 2layers.6.input_layernorm.weight7
|
| 714 |
+
output58�2%layers.6.self_attn.k_layernorm.weight7
|
| 715 |
+
output59�2%layers.6.self_attn.q_layernorm.weight1
|
| 716 |
+
output60� �02layers.6.mlp.down_proj.weight/
|
| 717 |
+
output61�0� 2layers.6.mlp.up_proj.weight:
|
| 718 |
+
output62� 2(layers.6.post_attention_layernorm.weight1
|
| 719 |
+
output63�0� 2layers.6.mlp.gate_proj.weight;
|
| 720 |
+
output64� �2'layers.7.self_attn.o_proj.o_proj.weight;
|
| 721 |
+
output65�� 2'layers.7.self_attn.qkv_proj.Wqkv.weight1
|
| 722 |
+
output66� 2layers.7.input_layernorm.weight7
|
| 723 |
+
output67�2%layers.7.self_attn.k_layernorm.weight7
|
| 724 |
+
output68�2%layers.7.self_attn.q_layernorm.weight1
|
| 725 |
+
output69� �02layers.7.mlp.down_proj.weight/
|
| 726 |
+
output70�0� 2layers.7.mlp.up_proj.weight:
|
| 727 |
+
output71� 2(layers.7.post_attention_layernorm.weight1
|
| 728 |
+
output72�0� 2layers.7.mlp.gate_proj.weight;
|
| 729 |
+
output73� �2'layers.8.self_attn.o_proj.o_proj.weight;
|
| 730 |
+
output74�� 2'layers.8.self_attn.qkv_proj.Wqkv.weight1
|
| 731 |
+
output75� 2layers.8.input_layernorm.weight7
|
| 732 |
+
output76�2%layers.8.self_attn.k_layernorm.weight7
|
| 733 |
+
output77�2%layers.8.self_attn.q_layernorm.weight1
|
| 734 |
+
output78� �02layers.8.mlp.down_proj.weight/
|
| 735 |
+
output79�0� 2layers.8.mlp.up_proj.weight:
|
| 736 |
+
output80� 2(layers.8.post_attention_layernorm.weight1
|
| 737 |
+
output81�0� 2layers.8.mlp.gate_proj.weight;
|
| 738 |
+
output82� �2'layers.9.self_attn.o_proj.o_proj.weight;
|
| 739 |
+
output83�� 2'layers.9.self_attn.qkv_proj.Wqkv.weight1
|
| 740 |
+
output84� 2layers.9.input_layernorm.weight7
|
| 741 |
+
output85�2%layers.9.self_attn.k_layernorm.weight7
|
| 742 |
+
output86�2%layers.9.self_attn.q_layernorm.weight1
|
| 743 |
+
output87� �02layers.9.mlp.down_proj.weight/
|
| 744 |
+
output88�0� 2layers.9.mlp.up_proj.weight:
|
| 745 |
+
output89� 2(layers.9.post_attention_layernorm.weight1
|
| 746 |
+
output90�0� 2layers.9.mlp.gate_proj.weight<
|
| 747 |
+
output91� �2(layers.10.self_attn.o_proj.o_proj.weight<
|
| 748 |
+
output92�� 2(layers.10.self_attn.qkv_proj.Wqkv.weight2
|
| 749 |
+
output93� 2 layers.10.input_layernorm.weight8
|
| 750 |
+
output94�2&layers.10.self_attn.k_layernorm.weight8
|
| 751 |
+
output95�2&layers.10.self_attn.q_layernorm.weight2
|
| 752 |
+
output96� �02layers.10.mlp.down_proj.weight0
|
| 753 |
+
output97�0� 2layers.10.mlp.up_proj.weight;
|
| 754 |
+
output98� 2)layers.10.post_attention_layernorm.weight2
|
| 755 |
+
output99�0� 2layers.10.mlp.gate_proj.weight=
|
| 756 |
+
output100� �2(layers.11.self_attn.o_proj.o_proj.weight=
|
| 757 |
+
output101�� 2(layers.11.self_attn.qkv_proj.Wqkv.weight3
|
| 758 |
+
output102� 2 layers.11.input_layernorm.weight9
|
| 759 |
+
output103�2&layers.11.self_attn.k_layernorm.weight9
|
| 760 |
+
output104�2&layers.11.self_attn.q_layernorm.weight3
|
| 761 |
+
output105� �02layers.11.mlp.down_proj.weight1
|
| 762 |
+
output106�0� 2layers.11.mlp.up_proj.weight<
|
| 763 |
+
output107� 2)layers.11.post_attention_layernorm.weight3
|
| 764 |
+
output108�0� 2layers.11.mlp.gate_proj.weight=
|
| 765 |
+
output109� �2(layers.12.self_attn.o_proj.o_proj.weight=
|
| 766 |
+
output110�� 2(layers.12.self_attn.qkv_proj.Wqkv.weight3
|
| 767 |
+
output111� 2 layers.12.input_layernorm.weight9
|
| 768 |
+
output112�2&layers.12.self_attn.k_layernorm.weight9
|
| 769 |
+
output113�2&layers.12.self_attn.q_layernorm.weight3
|
| 770 |
+
output114� �02layers.12.mlp.down_proj.weight1
|
| 771 |
+
output115�0� 2layers.12.mlp.up_proj.weight<
|
| 772 |
+
output116� 2)layers.12.post_attention_layernorm.weight3
|
| 773 |
+
output117�0� 2layers.12.mlp.gate_proj.weight=
|
| 774 |
+
output118� �2(layers.13.self_attn.o_proj.o_proj.weight=
|
| 775 |
+
output119�� 2(layers.13.self_attn.qkv_proj.Wqkv.weight3
|
| 776 |
+
output120� 2 layers.13.input_layernorm.weight9
|
| 777 |
+
output121�2&layers.13.self_attn.k_layernorm.weight9
|
| 778 |
+
output122�2&layers.13.self_attn.q_layernorm.weight3
|
| 779 |
+
output123� �02layers.13.mlp.down_proj.weight1
|
| 780 |
+
output124�0� 2layers.13.mlp.up_proj.weight<
|
| 781 |
+
output125� 2)layers.13.post_attention_layernorm.weight3
|
| 782 |
+
output126�0� 2layers.13.mlp.gate_proj.weight=
|
| 783 |
+
output127� �2(layers.14.self_attn.o_proj.o_proj.weight=
|
| 784 |
+
output128�� 2(layers.14.self_attn.qkv_proj.Wqkv.weight3
|
| 785 |
+
output129� 2 layers.14.input_layernorm.weight9
|
| 786 |
+
output130�2&layers.14.self_attn.k_layernorm.weight9
|
| 787 |
+
output131�2&layers.14.self_attn.q_layernorm.weight3
|
| 788 |
+
output132� �02layers.14.mlp.down_proj.weight1
|
| 789 |
+
output133�0� 2layers.14.mlp.up_proj.weight<
|
| 790 |
+
output134� 2)layers.14.post_attention_layernorm.weight3
|
| 791 |
+
output135�0� 2layers.14.mlp.gate_proj.weight=
|
| 792 |
+
output136� �2(layers.15.self_attn.o_proj.o_proj.weight=
|
| 793 |
+
output137�� 2(layers.15.self_attn.qkv_proj.Wqkv.weight3
|
| 794 |
+
output138� 2 layers.15.input_layernorm.weight9
|
| 795 |
+
output139�2&layers.15.self_attn.k_layernorm.weight9
|
| 796 |
+
output140�2&layers.15.self_attn.q_layernorm.weight3
|
| 797 |
+
output141� �02layers.15.mlp.down_proj.weight1
|
| 798 |
+
output142�0� 2layers.15.mlp.up_proj.weight<
|
| 799 |
+
output143� 2)layers.15.post_attention_layernorm.weight3
|
| 800 |
+
output144�0� 2layers.15.mlp.gate_proj.weight=
|
| 801 |
+
output145� �2(layers.16.self_attn.o_proj.o_proj.weight=
|
| 802 |
+
output146�� 2(layers.16.self_attn.qkv_proj.Wqkv.weight3
|
| 803 |
+
output147� 2 layers.16.input_layernorm.weight9
|
| 804 |
+
output148�2&layers.16.self_attn.k_layernorm.weight9
|
| 805 |
+
output149�2&layers.16.self_attn.q_layernorm.weight3
|
| 806 |
+
output150� �02layers.16.mlp.down_proj.weight1
|
| 807 |
+
output151�0� 2layers.16.mlp.up_proj.weight<
|
| 808 |
+
output152� 2)layers.16.post_attention_layernorm.weight3
|
| 809 |
+
output153�0� 2layers.16.mlp.gate_proj.weight=
|
| 810 |
+
output154� �2(layers.17.self_attn.o_proj.o_proj.weight=
|
| 811 |
+
output155�� 2(layers.17.self_attn.qkv_proj.Wqkv.weight3
|
| 812 |
+
output156� 2 layers.17.input_layernorm.weight9
|
| 813 |
+
output157�2&layers.17.self_attn.k_layernorm.weight9
|
| 814 |
+
output158�2&layers.17.self_attn.q_layernorm.weight3
|
| 815 |
+
output159� �02layers.17.mlp.down_proj.weight1
|
| 816 |
+
output160�0� 2layers.17.mlp.up_proj.weight<
|
| 817 |
+
output161� 2)layers.17.post_attention_layernorm.weight3
|
| 818 |
+
output162�0� 2layers.17.mlp.gate_proj.weight=
|
| 819 |
+
output163� �2(layers.18.self_attn.o_proj.o_proj.weight=
|
| 820 |
+
output164�� 2(layers.18.self_attn.qkv_proj.Wqkv.weight3
|
| 821 |
+
output165� 2 layers.18.input_layernorm.weight9
|
| 822 |
+
output166�2&layers.18.self_attn.k_layernorm.weight9
|
| 823 |
+
output167�2&layers.18.self_attn.q_layernorm.weight3
|
| 824 |
+
output168� �02layers.18.mlp.down_proj.weight1
|
| 825 |
+
output169�0� 2layers.18.mlp.up_proj.weight<
|
| 826 |
+
output170� 2)layers.18.post_attention_layernorm.weight3
|
| 827 |
+
output171�0� 2layers.18.mlp.gate_proj.weight=
|
| 828 |
+
output172� �2(layers.19.self_attn.o_proj.o_proj.weight=
|
| 829 |
+
output173�� 2(layers.19.self_attn.qkv_proj.Wqkv.weight3
|
| 830 |
+
output174� 2 layers.19.input_layernorm.weight9
|
| 831 |
+
output175�2&layers.19.self_attn.k_layernorm.weight9
|
| 832 |
+
output176�2&layers.19.self_attn.q_layernorm.weight3
|
| 833 |
+
output177� �02layers.19.mlp.down_proj.weight1
|
| 834 |
+
output178�0� 2layers.19.mlp.up_proj.weight<
|
| 835 |
+
output179� 2)layers.19.post_attention_layernorm.weight3
|
| 836 |
+
output180�0� 2layers.19.mlp.gate_proj.weight=
|
| 837 |
+
output181� �2(layers.20.self_attn.o_proj.o_proj.weight=
|
| 838 |
+
output182�� 2(layers.20.self_attn.qkv_proj.Wqkv.weight3
|
| 839 |
+
output183� 2 layers.20.input_layernorm.weight9
|
| 840 |
+
output184�2&layers.20.self_attn.k_layernorm.weight9
|
| 841 |
+
output185�2&layers.20.self_attn.q_layernorm.weight3
|
| 842 |
+
output186� �02layers.20.mlp.down_proj.weight1
|
| 843 |
+
output187�0� 2layers.20.mlp.up_proj.weight<
|
| 844 |
+
output188� 2)layers.20.post_attention_layernorm.weight3
|
| 845 |
+
output189�0� 2layers.20.mlp.gate_proj.weight=
|
| 846 |
+
output190� �2(layers.21.self_attn.o_proj.o_proj.weight=
|
| 847 |
+
output191�� 2(layers.21.self_attn.qkv_proj.Wqkv.weight3
|
| 848 |
+
output192� 2 layers.21.input_layernorm.weight9
|
| 849 |
+
output193�2&layers.21.self_attn.k_layernorm.weight9
|
| 850 |
+
output194�2&layers.21.self_attn.q_layernorm.weight3
|
| 851 |
+
output195� �02layers.21.mlp.down_proj.weight1
|
| 852 |
+
output196�0� 2layers.21.mlp.up_proj.weight<
|
| 853 |
+
output197� 2)layers.21.post_attention_layernorm.weight3
|
| 854 |
+
output198�0� 2layers.21.mlp.gate_proj.weight=
|
| 855 |
+
output199� �2(layers.22.self_attn.o_proj.o_proj.weight=
|
| 856 |
+
output200�� 2(layers.22.self_attn.qkv_proj.Wqkv.weight3
|
| 857 |
+
output201� 2 layers.22.input_layernorm.weight9
|
| 858 |
+
output202�2&layers.22.self_attn.k_layernorm.weight9
|
| 859 |
+
output203�2&layers.22.self_attn.q_layernorm.weight3
|
| 860 |
+
output204� �02layers.22.mlp.down_proj.weight1
|
| 861 |
+
output205�0� 2layers.22.mlp.up_proj.weight<
|
| 862 |
+
output206� 2)layers.22.post_attention_layernorm.weight3
|
| 863 |
+
output207�0� 2layers.22.mlp.gate_proj.weight=
|
| 864 |
+
output208� �2(layers.23.self_attn.o_proj.o_proj.weight=
|
| 865 |
+
output209�� 2(layers.23.self_attn.qkv_proj.Wqkv.weight3
|
| 866 |
+
output210� 2 layers.23.input_layernorm.weight9
|
| 867 |
+
output211�2&layers.23.self_attn.k_layernorm.weight9
|
| 868 |
+
output212�2&layers.23.self_attn.q_layernorm.weight3
|
| 869 |
+
output213� �02layers.23.mlp.down_proj.weight1
|
| 870 |
+
output214�0� 2layers.23.mlp.up_proj.weight<
|
| 871 |
+
output215� 2)layers.23.post_attention_layernorm.weight3
|
| 872 |
+
output216�0� 2layers.23.mlp.gate_proj.weight=
|
| 873 |
+
output217� �2(layers.24.self_attn.o_proj.o_proj.weight=
|
| 874 |
+
output218�� 2(layers.24.self_attn.qkv_proj.Wqkv.weight3
|
| 875 |
+
output219� 2 layers.24.input_layernorm.weight9
|
| 876 |
+
output220�2&layers.24.self_attn.k_layernorm.weight9
|
| 877 |
+
output221�2&layers.24.self_attn.q_layernorm.weight3
|
| 878 |
+
output222� �02layers.24.mlp.down_proj.weight1
|
| 879 |
+
output223�0� 2layers.24.mlp.up_proj.weight<
|
| 880 |
+
output224� 2)layers.24.post_attention_layernorm.weight3
|
| 881 |
+
output225�0� 2layers.24.mlp.gate_proj.weight=
|
| 882 |
+
output226� �2(layers.25.self_attn.o_proj.o_proj.weight=
|
| 883 |
+
output227�� 2(layers.25.self_attn.qkv_proj.Wqkv.weight3
|
| 884 |
+
output228� 2 layers.25.input_layernorm.weight9
|
| 885 |
+
output229�2&layers.25.self_attn.k_layernorm.weight9
|
| 886 |
+
output230�2&layers.25.self_attn.q_layernorm.weight3
|
| 887 |
+
output231� �02layers.25.mlp.down_proj.weight1
|
| 888 |
+
output232�0� 2layers.25.mlp.up_proj.weight<
|
| 889 |
+
output233� 2)layers.25.post_attention_layernorm.weight3
|
| 890 |
+
output234�0� 2layers.25.mlp.gate_proj.weight=
|
| 891 |
+
output235� �2(layers.26.self_attn.o_proj.o_proj.weight=
|
| 892 |
+
output236�� 2(layers.26.self_attn.qkv_proj.Wqkv.weight3
|
| 893 |
+
output237� 2 layers.26.input_layernorm.weight9
|
| 894 |
+
output238�2&layers.26.self_attn.k_layernorm.weight9
|
| 895 |
+
output239�2&layers.26.self_attn.q_layernorm.weight3
|
| 896 |
+
output240� �02layers.26.mlp.down_proj.weight1
|
| 897 |
+
output241�0� 2layers.26.mlp.up_proj.weight<
|
| 898 |
+
output242� 2)layers.26.post_attention_layernorm.weight3
|
| 899 |
+
output243�0� 2layers.26.mlp.gate_proj.weight=
|
| 900 |
+
output244� �2(layers.27.self_attn.o_proj.o_proj.weight=
|
| 901 |
+
output245�� 2(layers.27.self_attn.qkv_proj.Wqkv.weight3
|
| 902 |
+
output246� 2 layers.27.input_layernorm.weight9
|
| 903 |
+
output247�2&layers.27.self_attn.k_layernorm.weight9
|
| 904 |
+
output248�2&layers.27.self_attn.q_layernorm.weight3
|
| 905 |
+
output249� �02layers.27.mlp.down_proj.weight1
|
| 906 |
+
output250�0� 2layers.27.mlp.up_proj.weight<
|
| 907 |
+
output251� 2)layers.27.post_attention_layernorm.weight3
|
| 908 |
+
output252�0� 2layers.27.mlp.gate_proj.weight=
|
| 909 |
+
output253� �2(layers.28.self_attn.o_proj.o_proj.weight=
|
| 910 |
+
output254�� 2(layers.28.self_attn.qkv_proj.Wqkv.weight3
|
| 911 |
+
output255� 2 layers.28.input_layernorm.weight9
|
| 912 |
+
output256�2&layers.28.self_attn.k_layernorm.weight9
|
| 913 |
+
output257�2&layers.28.self_attn.q_layernorm.weight3
|
| 914 |
+
output258� �02layers.28.mlp.down_proj.weight1
|
| 915 |
+
output259�0� 2layers.28.mlp.up_proj.weight<
|
| 916 |
+
output260� 2)layers.28.post_attention_layernorm.weight3
|
| 917 |
+
output261�0� 2layers.28.mlp.gate_proj.weight=
|
| 918 |
+
output262� �2(layers.29.self_attn.o_proj.o_proj.weight=
|
| 919 |
+
output263�� 2(layers.29.self_attn.qkv_proj.Wqkv.weight3
|
| 920 |
+
output264� 2 layers.29.input_layernorm.weight9
|
| 921 |
+
output265�2&layers.29.self_attn.k_layernorm.weight9
|
| 922 |
+
output266�2&layers.29.self_attn.q_layernorm.weight3
|
| 923 |
+
output267� �02layers.29.mlp.down_proj.weight1
|
| 924 |
+
output268�0� 2layers.29.mlp.up_proj.weight<
|
| 925 |
+
output269� 2)layers.29.post_attention_layernorm.weight3
|
| 926 |
+
output270�0� 2layers.29.mlp.gate_proj.weight=
|
| 927 |
+
output271� �2(layers.30.self_attn.o_proj.o_proj.weight=
|
| 928 |
+
output272�� 2(layers.30.self_attn.qkv_proj.Wqkv.weight3
|
| 929 |
+
output273� 2 layers.30.input_layernorm.weight9
|
| 930 |
+
output274�2&layers.30.self_attn.k_layernorm.weight9
|
| 931 |
+
output275�2&layers.30.self_attn.q_layernorm.weight3
|
| 932 |
+
output276� �02layers.30.mlp.down_proj.weight1
|
| 933 |
+
output277�0� 2layers.30.mlp.up_proj.weight<
|
| 934 |
+
output278� 2)layers.30.post_attention_layernorm.weight3
|
| 935 |
+
output279�0� 2layers.30.mlp.gate_proj.weight=
|
| 936 |
+
output280� �2(layers.31.self_attn.o_proj.o_proj.weight=
|
| 937 |
+
output281�� 2(layers.31.self_attn.qkv_proj.Wqkv.weight3
|
| 938 |
+
output282� 2 layers.31.input_layernorm.weight9
|
| 939 |
+
output283�2&layers.31.self_attn.k_layernorm.weight9
|
| 940 |
+
output284�2&layers.31.self_attn.q_layernorm.weight3
|
| 941 |
+
output285� �02layers.31.mlp.down_proj.weight1
|
| 942 |
+
output286�0� 2layers.31.mlp.up_proj.weight<
|
| 943 |
+
output287� 2)layers.31.post_attention_layernorm.weight3
|
| 944 |
+
output288�0� 2layers.31.mlp.gate_proj.weight=
|
| 945 |
+
output289� �2(layers.32.self_attn.o_proj.o_proj.weight=
|
| 946 |
+
output290�� 2(layers.32.self_attn.qkv_proj.Wqkv.weight3
|
| 947 |
+
output291� 2 layers.32.input_layernorm.weight9
|
| 948 |
+
output292�2&layers.32.self_attn.k_layernorm.weight9
|
| 949 |
+
output293�2&layers.32.self_attn.q_layernorm.weight3
|
| 950 |
+
output294� �02layers.32.mlp.down_proj.weight1
|
| 951 |
+
output295�0� 2layers.32.mlp.up_proj.weight<
|
| 952 |
+
output296� 2)layers.32.post_attention_layernorm.weight3
|
| 953 |
+
output297�0� 2layers.32.mlp.gate_proj.weight=
|
| 954 |
+
output298� �2(layers.33.self_attn.o_proj.o_proj.weight=
|
| 955 |
+
output299�� 2(layers.33.self_attn.qkv_proj.Wqkv.weight3
|
| 956 |
+
output300� 2 layers.33.input_layernorm.weight9
|
| 957 |
+
output301�2&layers.33.self_attn.k_layernorm.weight9
|
| 958 |
+
output302�2&layers.33.self_attn.q_layernorm.weight3
|
| 959 |
+
output303� �02layers.33.mlp.down_proj.weight1
|
| 960 |
+
output304�0� 2layers.33.mlp.up_proj.weight<
|
| 961 |
+
output305� 2)layers.33.post_attention_layernorm.weight3
|
| 962 |
+
output306�0� 2layers.33.mlp.gate_proj.weight=
|
| 963 |
+
output307� �2(layers.34.self_attn.o_proj.o_proj.weight=
|
| 964 |
+
output308�� 2(layers.34.self_attn.qkv_proj.Wqkv.weight3
|
| 965 |
+
output309� 2 layers.34.input_layernorm.weight9
|
| 966 |
+
output310�2&layers.34.self_attn.k_layernorm.weight9
|
| 967 |
+
output311�2&layers.34.self_attn.q_layernorm.weight3
|
| 968 |
+
output312� �02layers.34.mlp.down_proj.weight1
|
| 969 |
+
output313�0� 2layers.34.mlp.up_proj.weight<
|
| 970 |
+
output314� 2)layers.34.post_attention_layernorm.weight3
|
| 971 |
+
output315�0� 2layers.34.mlp.gate_proj.weight=
|
| 972 |
+
output316� �2(layers.35.self_attn.o_proj.o_proj.weight=
|
| 973 |
+
output317�� 2(layers.35.self_attn.qkv_proj.Wqkv.weight3
|
| 974 |
+
output318� 2 layers.35.input_layernorm.weight9
|
| 975 |
+
output319�2&layers.35.self_attn.k_layernorm.weight9
|
| 976 |
+
output320�2&layers.35.self_attn.q_layernorm.weight3
|
| 977 |
+
output321� �02layers.35.mlp.down_proj.weight1
|
| 978 |
+
output322�0� 2layers.35.mlp.up_proj.weight<
|
| 979 |
+
output323� 2)layers.35.post_attention_layernorm.weight3
|
| 980 |
+
output324�0� 2layers.35.mlp.gate_proj.weight$
|
| 981 |
+
output325��� 2lm_head.weight
|
| 982 |
+
output326� 2norm.weight
|
layout_opt/model/graph.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:faa8f2dc49f4606210aa2baec2b92796320bf4e8f2f13139e7db28860aa0ad17
|
| 3 |
+
size 173259
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:464989b5c79dac0618dd8b9d1c58df8196ec48f89f913ca9ad1e530e04edff5f
|
| 3 |
+
size 17614391015
|
neuron_config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 3 |
+
"async_mode": false,
|
| 4 |
+
"attn_kernel_enabled": false,
|
| 5 |
+
"batch_size": 4,
|
| 6 |
+
"capacity_factor": null,
|
| 7 |
+
"cc_pipeline_tiling_factor": 2,
|
| 8 |
+
"checkpoint_id": "karanps/ChessLM_Qwen3",
|
| 9 |
+
"checkpoint_revision": "e0d57507d96b2be2dd0dc901ecb231dec2dd6330",
|
| 10 |
+
"continuous_batching": true,
|
| 11 |
+
"enable_bucketing": false,
|
| 12 |
+
"ep_degree": 1,
|
| 13 |
+
"flash_decoding_enabled": false,
|
| 14 |
+
"fused_qkv": true,
|
| 15 |
+
"glu_mlp": true,
|
| 16 |
+
"is_chunked_prefill": false,
|
| 17 |
+
"local_ranks_size": 2,
|
| 18 |
+
"logical_nc_config": 1,
|
| 19 |
+
"max_batch_size": 4,
|
| 20 |
+
"max_context_length": 2048,
|
| 21 |
+
"max_topk": 256,
|
| 22 |
+
"mlp_kernel_enabled": false,
|
| 23 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 24 |
+
"n_active_tokens": 2048,
|
| 25 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 26 |
+
"num_cores_per_group": 1,
|
| 27 |
+
"on_device_sampling": false,
|
| 28 |
+
"optimum_neuron_version": "0.3.0",
|
| 29 |
+
"output_logits": false,
|
| 30 |
+
"padding_side": "right",
|
| 31 |
+
"pp_degree": 1,
|
| 32 |
+
"qk_layernorm": false,
|
| 33 |
+
"qkv_kernel_enabled": false,
|
| 34 |
+
"rpl_reduce_dtype": "bfloat16",
|
| 35 |
+
"sequence_length": 2048,
|
| 36 |
+
"sequence_parallel_enabled": false,
|
| 37 |
+
"speculation_length": 0,
|
| 38 |
+
"start_rank_id": 0,
|
| 39 |
+
"target": null,
|
| 40 |
+
"torch_dtype": "bfloat16",
|
| 41 |
+
"tp_degree": 2,
|
| 42 |
+
"vocab_parallel": false
|
| 43 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
token_generation_model/_tp0_bk0/command.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
neuronx-cc compile --framework=XLA model.MODULE_8f245c7816a398e13e79+a9d440f5.hlo_module.pb --output model.MODULE_8f245c7816a398e13e79+a9d440f5.neff --target=trn1 --auto-cast=none --model-type=transformer '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ' -O2 --lnc=1 --logfile=log-neuron-cc.txt --enable-internal-neff-wrapper --verbose=35
|
token_generation_model/_tp0_bk0/compile_flags.MODULE_8f245c7816a398e13e79+a9d440f5.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
token_generation_model/_tp0_bk0/global_metric_store.json
ADDED
|
@@ -0,0 +1,524 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Average": {
|
| 3 |
+
"tensorizer": {
|
| 4 |
+
"StaticProfiler::AverageFractalPeUtilization": 99.34062957763672,
|
| 5 |
+
"StaticProfiler::AveragePartitionUtilization": 98.38597106933594,
|
| 6 |
+
"StaticProfiler::AveragePeUtilization": 97.22911071777344,
|
| 7 |
+
"StaticProfiler::LocalizationEfficiency": 114.75756072998047,
|
| 8 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 114.9507064819336,
|
| 9 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0,
|
| 10 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"Count": {
|
| 14 |
+
"tensorizer": {
|
| 15 |
+
"StaticProfiler::AverageFractalPeUtilization": 1,
|
| 16 |
+
"StaticProfiler::AveragePartitionUtilization": 1,
|
| 17 |
+
"StaticProfiler::AveragePeUtilization": 1,
|
| 18 |
+
"StaticProfiler::LocalizationEfficiency": 1,
|
| 19 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1,
|
| 20 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 1,
|
| 21 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 1
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
"Sum": {
|
| 25 |
+
"compiletime": {
|
| 26 |
+
"AGOrderingAnalysisPass": 2.1720478534698486,
|
| 27 |
+
"AffinePredicateResolution": 0.05626630783081055,
|
| 28 |
+
"AliasDependencyElimination": 0.0026874542236328125,
|
| 29 |
+
"AliasDependencyInduction": 0.5170383453369141,
|
| 30 |
+
"AliasDependencyReset": 2.2118747234344482,
|
| 31 |
+
"BFComputeCutting": 0.12706279754638672,
|
| 32 |
+
"BirCodeGenLoop": 2.4660451412200928,
|
| 33 |
+
"CCOpFusion": 0.8360562324523926,
|
| 34 |
+
"CanonicalizeConv": 1.700000029813964e-05,
|
| 35 |
+
"CanonicalizeDAGForPGTiling": 0.2245333194732666,
|
| 36 |
+
"CanonicalizeForTensorizer": 0.0003549999964889139,
|
| 37 |
+
"CanonicalizeIR": 0.07496881484985352,
|
| 38 |
+
"Canonicalizer": 0.00687999976798892,
|
| 39 |
+
"CoalesceCCOp": 0.20974230766296387,
|
| 40 |
+
"CommuteConcat": 0.03844571113586426,
|
| 41 |
+
"DMALocalityOpt": 0.0490877628326416,
|
| 42 |
+
"DMAProfiler": 0.09942150115966797,
|
| 43 |
+
"DMATilingProfiler": 0.09033346176147461,
|
| 44 |
+
"DataLocalityOpt": 2.409433364868164,
|
| 45 |
+
"DataStreaming": 0.16509604454040527,
|
| 46 |
+
"DeConcat": 0.035332441329956055,
|
| 47 |
+
"DeadCodeElimination": 0.0381770133972168,
|
| 48 |
+
"DeadStoreElimination": 1.5507030487060547,
|
| 49 |
+
"DelinearIndices": 0.39624762535095215,
|
| 50 |
+
"Delinearization": 0.15817999839782715,
|
| 51 |
+
"DoNothing": 7.104873657226563e-05,
|
| 52 |
+
"DramToDramTranspose": 0.09848809242248535,
|
| 53 |
+
"DumpGraphAndMetadata": 0.2141716480255127,
|
| 54 |
+
"EliminateDivs": 0.19814848899841309,
|
| 55 |
+
"ExpandBatchNorm": 0.0714106559753418,
|
| 56 |
+
"ExpandISAMacro": 0.10404038429260254,
|
| 57 |
+
"FactorizeBlkDims": 0.7872159481048584,
|
| 58 |
+
"FactorizeThreadAxesInFreeDims": 0.06115102767944336,
|
| 59 |
+
"FlattenMacroLoop": 0.10332107543945313,
|
| 60 |
+
"GenericAccessSimplifier": 0.03612351417541504,
|
| 61 |
+
"HoistCompute": 4.70000013592653e-05,
|
| 62 |
+
"IdentifyCrossPassTensors": 0.0004290000069886446,
|
| 63 |
+
"InferInitValue": 1.2103745937347412,
|
| 64 |
+
"InferIntrinsicOnCC": 0.49721264839172363,
|
| 65 |
+
"InferNeuronTensor": 2.2387959957122803,
|
| 66 |
+
"InferNonlocalTensors": 6.154020309448242,
|
| 67 |
+
"InferPSumTensor": 1.2426848411560059,
|
| 68 |
+
"InlineNativeKernels": 0.31959033012390137,
|
| 69 |
+
"InsertIOTransposes": 1.2181267738342285,
|
| 70 |
+
"InsertLocalTransposes": 1.086057424545288,
|
| 71 |
+
"InsertOffloadedTransposes": 0.1012120246887207,
|
| 72 |
+
"LICM": 0.1255204677581787,
|
| 73 |
+
"LateLegalizeInst": 0.19177460670471191,
|
| 74 |
+
"LateLegalizePostSplit": 0.10815072059631348,
|
| 75 |
+
"LateLowerReshapeOp": 0.045404911041259766,
|
| 76 |
+
"LateLowerTensorOp": 0.3547041416168213,
|
| 77 |
+
"LateNeuronInstComb": 0.4670724868774414,
|
| 78 |
+
"LayoutPreprocessing": 1.1901025772094727,
|
| 79 |
+
"LayoutPreprocessingAndAnalysis": 1.6328880786895752,
|
| 80 |
+
"LayoutRequirementAnalysis": 0.42856860160827637,
|
| 81 |
+
"LegalizeCCOpLayout": 0.08699345588684082,
|
| 82 |
+
"LegalizeOpLevelAlias": 0.03149080276489258,
|
| 83 |
+
"LegalizePartitionReduce": 0.09608721733093262,
|
| 84 |
+
"LegalizeSundaAccess": 1.5293858051300049,
|
| 85 |
+
"LegalizeSundaMacro": 0.44698476791381836,
|
| 86 |
+
"LegalizeType": 0.2130870819091797,
|
| 87 |
+
"LocalLayoutOpt": 0.8399438858032227,
|
| 88 |
+
"LoopFusion": 0.40386009216308594,
|
| 89 |
+
"LoopSplitting": 0.05149984359741211,
|
| 90 |
+
"LowerBroadcast": 0.11290383338928223,
|
| 91 |
+
"LowerCCOpBlockAxis": 0.2651100158691406,
|
| 92 |
+
"LowerComplexBroadcast": 0.1815800666809082,
|
| 93 |
+
"LowerIntrinsics": 1.2034423351287842,
|
| 94 |
+
"LowerTensorOp": 0.515345573425293,
|
| 95 |
+
"LowerTranspose": 0.5510139465332031,
|
| 96 |
+
"MacroGeneration": 3.3921492099761963,
|
| 97 |
+
"MaskPropagation": 0.14800381660461426,
|
| 98 |
+
"MemcastMotion": 0.0002300000051036477,
|
| 99 |
+
"MemcpyElimination": 5.45711612701416,
|
| 100 |
+
"MutateDataType": 0.04850482940673828,
|
| 101 |
+
"NeuronAliasDependencyInduction": 0.028447866439819336,
|
| 102 |
+
"NeuronAliasDependencyReset": 0.04381752014160156,
|
| 103 |
+
"NeuronInstComb": 0.20636940002441406,
|
| 104 |
+
"NeuronLICM": 0.3387613296508789,
|
| 105 |
+
"NeuronLoopFusion": 1.5814118385314941,
|
| 106 |
+
"NeuronLoopInterchange": 0.06079745292663574,
|
| 107 |
+
"NeuronSimplifier": 0.4541950225830078,
|
| 108 |
+
"NeuronSimplifyPredicates": 0.0973823070526123,
|
| 109 |
+
"NeuronValueNumbering": 0.11516690254211426,
|
| 110 |
+
"OptimizeAliasedCopyChain": 0.018416881561279297,
|
| 111 |
+
"OptimizeNKIKernels": 0.07892012596130371,
|
| 112 |
+
"PAGLayoutOpt": 7.157426357269287,
|
| 113 |
+
"PComputeCutting": 0.45456743240356445,
|
| 114 |
+
"PGLayoutTilingPipeline": 24.0252628326416,
|
| 115 |
+
"PGTiling": 6.715877532958984,
|
| 116 |
+
"PadElimination": 0.013921260833740234,
|
| 117 |
+
"ParAxesAnnotation": 6.056151390075684,
|
| 118 |
+
"PartialLoopFusion": 0.4644014835357666,
|
| 119 |
+
"PartialSimdFusion": 0.4906351566314697,
|
| 120 |
+
"PenguinizeFunctions": 0.00021300000662449747,
|
| 121 |
+
"PerfectLoopNest": 0.06508874893188477,
|
| 122 |
+
"PruneFunctions": 0.0007450000266544521,
|
| 123 |
+
"RecognizeOpIdiom": 0.2098982334136963,
|
| 124 |
+
"Recompute": 0.008437871932983398,
|
| 125 |
+
"RelaxPredicates": 0.1717524528503418,
|
| 126 |
+
"Rematerialization": 0.265545129776001,
|
| 127 |
+
"RemoveOptimizationBarriers": 0.0005959999980404973,
|
| 128 |
+
"ReshapeWeights": 0.021679162979125977,
|
| 129 |
+
"ResolveAccessConflict": 0.26529383659362793,
|
| 130 |
+
"ResolveComplicatePredicates": 0.057276248931884766,
|
| 131 |
+
"RewriteReplicationMatmul": 0.05362248420715332,
|
| 132 |
+
"RewriteWeights": 0.06288814544677734,
|
| 133 |
+
"SFKVectorizer": 7.441895961761475,
|
| 134 |
+
"ScatterMotion": 0.003945999778807163,
|
| 135 |
+
"SimpleAllReduceTiling": 0.0798797607421875,
|
| 136 |
+
"Simplifier": 0.12714624404907227,
|
| 137 |
+
"SimplifyMacroPredicates": 0.21231913566589355,
|
| 138 |
+
"SimplifyNeuronTensor": 0.36804652214050293,
|
| 139 |
+
"SimplifySlice": 0.03702497482299805,
|
| 140 |
+
"SimplifyTensor": 0.24286293983459473,
|
| 141 |
+
"SpillPSum": 0.6947588920593262,
|
| 142 |
+
"SplitAPUnionSets": 0.5079879760742188,
|
| 143 |
+
"SplitAccGrp": 0.05273175239562988,
|
| 144 |
+
"StaticProfiler": 0.1567850112915039,
|
| 145 |
+
"StaticTransposeLocalTensor": 0.46353960037231445,
|
| 146 |
+
"SundaISel": 1.5079319477081299,
|
| 147 |
+
"TCTransform": 0.04103660583496094,
|
| 148 |
+
"TensorInitialization": 0.17437958717346191,
|
| 149 |
+
"TensorOpSimplifier": 0.34393739700317383,
|
| 150 |
+
"TensorOpTransform": 1.1691737174987793,
|
| 151 |
+
"TensorizerLegalizationPass": 0.00018099999579135329,
|
| 152 |
+
"TileCCOps": 0.24624872207641602,
|
| 153 |
+
"TilingProfiler": 0.542656421661377,
|
| 154 |
+
"TransformConvOp": 0.13129019737243652,
|
| 155 |
+
"TritiumFusion": 1.9942443370819092,
|
| 156 |
+
"ValueNumbering": 0.11710119247436523,
|
| 157 |
+
"VectorizeDMA": 0.14786601066589355,
|
| 158 |
+
"VectorizeMatMult": 0.055516958236694336,
|
| 159 |
+
"VerifySupportedOps": 0.0003000000142492354,
|
| 160 |
+
"WeightCoalescing": 0.06569314002990723,
|
| 161 |
+
"ZeroSizeTensorElimination": 0.00036597251892089844,
|
| 162 |
+
"algsimp": 0.0020069999154657125,
|
| 163 |
+
"batchnorm_expander": 0.0007229999755509198,
|
| 164 |
+
"boundary-marker-removal": 0.0003640000068116933,
|
| 165 |
+
"call-inliner": 0.0002280000044265762,
|
| 166 |
+
"canonicalize-boundary-marker": 0.00044999999227002263,
|
| 167 |
+
"collective-stream-id-checker": 4.70000013592653e-05,
|
| 168 |
+
"comparison-expander": 0.0003969999961555004,
|
| 169 |
+
"computation-deduplicator": 0.00042600001324899495,
|
| 170 |
+
"config-lowering": 0.0001900000061141327,
|
| 171 |
+
"constant_folding": 0.000155999994603917,
|
| 172 |
+
"cse": 0.0004360000020824373,
|
| 173 |
+
"dce": 3.600000127335079e-05,
|
| 174 |
+
"dynamic-slice-transpose": 0.00014400000509340316,
|
| 175 |
+
"eliminate-redundant-compare": 0.0001429999974789098,
|
| 176 |
+
"emit-offloaded-dropout": 0.00024300000222865492,
|
| 177 |
+
"flatten-call-graph": 0.0002789999998640269,
|
| 178 |
+
"fuse-send-recv": 0.0013989999424666166,
|
| 179 |
+
"hilo-conditional-to-select": 8.099999831756577e-05,
|
| 180 |
+
"hilo::LegalizeAlias": 0.0032820000778883696,
|
| 181 |
+
"hilo::NeuronInstCombine": 0.0011530000483617187,
|
| 182 |
+
"hilo::NeuronOpFusion": 0.0002010000025620684,
|
| 183 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 0.00039900001138448715,
|
| 184 |
+
"hilo::ScheduleFusion": 3.5000000934815034e-05,
|
| 185 |
+
"hilo::SixtyFourHack": 0.0005590000073425472,
|
| 186 |
+
"hilo::VerifyAliasing": 7.000000186963007e-05,
|
| 187 |
+
"hlo-mac-count": 0.0004199999966658652,
|
| 188 |
+
"io-con-pipe-begin": 4.999999873689376e-06,
|
| 189 |
+
"io-con-pipe-end": 9.999999974752427e-07,
|
| 190 |
+
"io-layout-normalization": 0.0008989999769255519,
|
| 191 |
+
"legalize-ccops-for-tensorizer": 1.5999999959603883e-05,
|
| 192 |
+
"legalize-compare": 0.0003650000144261867,
|
| 193 |
+
"lower-argminmax-custom-call": 0.00015700000221841037,
|
| 194 |
+
"map-inline": 0.0006140000186860561,
|
| 195 |
+
"metadata-naming": 0.0009309999877586961,
|
| 196 |
+
"mlir::detail::OpToOpPassAdaptor": 0.00030700000934302807,
|
| 197 |
+
"mlir::hlo::MhloToPyPenguin": 0.02938299998641014,
|
| 198 |
+
"mlir::mhlo::LowerComplexExtraPass": 0.0031610000878572464,
|
| 199 |
+
"mlir::mhlo::LowerComplexPass": 0.0037410000804811716,
|
| 200 |
+
"native-to-custom-softmax": 0.00034500000765547156,
|
| 201 |
+
"native-to-custom-softmax-dx": 0.0004039999912492931,
|
| 202 |
+
"neuron-hlo-verifier": 0.017588000744581223,
|
| 203 |
+
"operand_upcaster": 0.0006549999816343188,
|
| 204 |
+
"post-par-pipe-begin": 9.999999974752427e-07,
|
| 205 |
+
"post-par-pipe-end": 0.0,
|
| 206 |
+
"post-partition-simplification": 0.061535999178886414,
|
| 207 |
+
"pre-hlo-begin": 3.999999989900971e-06,
|
| 208 |
+
"pre-hlo-end": 9.999999974752427e-07,
|
| 209 |
+
"replace-minimum-constant": 0.0001880000054370612,
|
| 210 |
+
"reshape-mover": 7.000000186963007e-05,
|
| 211 |
+
"simplify-concat": 0.0017259999876841903,
|
| 212 |
+
"simplify-while-loops": 4.8999998398358e-05,
|
| 213 |
+
"transform-variadic-reduce": 0.0006210000137798488,
|
| 214 |
+
"tuple-simplifier": 0.00017600000137463212,
|
| 215 |
+
"unpack-nested-aws-ntwsr": 0.00033000000985339284,
|
| 216 |
+
"unroll-while-loop": 9.000000318337698e-06
|
| 217 |
+
},
|
| 218 |
+
"hilo": {
|
| 219 |
+
"HloMacCount": 16344449024.0,
|
| 220 |
+
"Traffic": 8801719296.0
|
| 221 |
+
},
|
| 222 |
+
"tensorizer": {
|
| 223 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 314028,
|
| 224 |
+
"StaticProfiler::AifUb": 16.346195220947266,
|
| 225 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 18.758495330810547,
|
| 226 |
+
"StaticProfiler::AverageDmaLength": 5336.5810546875,
|
| 227 |
+
"StaticProfiler::DDRTransferBytes": 8194615604,
|
| 228 |
+
"StaticProfiler::InternalTransferBytes": 976258560,
|
| 229 |
+
"StaticProfiler::LoadExpanded": 1396391,
|
| 230 |
+
"StaticProfiler::StoreExpanded": 79617,
|
| 231 |
+
"StaticProfiler::TotalDMAExpanded": 1476008,
|
| 232 |
+
"StaticProfiler::TotalDynamicInstancesCount": 327331,
|
| 233 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 320559,
|
| 234 |
+
"StaticProfiler::TotalLNCComm": 0,
|
| 235 |
+
"StaticProfiler::TotalLNCCommTransfer": 0,
|
| 236 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0,
|
| 237 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0,
|
| 238 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 352,
|
| 239 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 250560,
|
| 240 |
+
"TilingProfiler::NumPfTransposes": 366,
|
| 241 |
+
"TilingProfiler::NumPfTransposesForIo": 39,
|
| 242 |
+
"TilingProfiler::NumPfTransposesForLocal": 182,
|
| 243 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 145,
|
| 244 |
+
"TilingProfiler::PfTransposeInstructions": 34596,
|
| 245 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 28280,
|
| 246 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 1668,
|
| 247 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 4648,
|
| 248 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 720,
|
| 249 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 10459,
|
| 250 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0,
|
| 251 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0,
|
| 252 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0,
|
| 253 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0,
|
| 254 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0,
|
| 255 |
+
"TransformConvOp::conv2d_column_packing": 0,
|
| 256 |
+
"TransformConvOp::conv2d_column_packing_1": 0,
|
| 257 |
+
"TransformConvOp::conv2d_column_packing_io10": 0,
|
| 258 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0
|
| 259 |
+
}
|
| 260 |
+
},
|
| 261 |
+
"all": {
|
| 262 |
+
"compiletime": {
|
| 263 |
+
"CanonicalizeConv": 1.700000029813964e-05,
|
| 264 |
+
"CanonicalizeForTensorizer": 0.0003549999964889139,
|
| 265 |
+
"Canonicalizer": 0.00687999976798892,
|
| 266 |
+
"HoistCompute": 4.70000013592653e-05,
|
| 267 |
+
"IdentifyCrossPassTensors": 0.0004290000069886446,
|
| 268 |
+
"MemcastMotion": 0.0002300000051036477,
|
| 269 |
+
"PenguinizeFunctions": 0.00021300000662449747,
|
| 270 |
+
"PruneFunctions": 0.0007450000266544521,
|
| 271 |
+
"RemoveOptimizationBarriers": 0.0005959999980404973,
|
| 272 |
+
"ScatterMotion": 0.003945999778807163,
|
| 273 |
+
"TensorizerLegalizationPass": 0.00018099999579135329,
|
| 274 |
+
"VerifySupportedOps": 0.0003000000142492354,
|
| 275 |
+
"algsimp": 0.0020069999154657125,
|
| 276 |
+
"batchnorm_expander": 0.0007229999755509198,
|
| 277 |
+
"boundary-marker-removal": 0.0003640000068116933,
|
| 278 |
+
"call-inliner": 0.0002280000044265762,
|
| 279 |
+
"canonicalize-boundary-marker": 0.00044999999227002263,
|
| 280 |
+
"collective-stream-id-checker": 4.70000013592653e-05,
|
| 281 |
+
"comparison-expander": 0.0003969999961555004,
|
| 282 |
+
"computation-deduplicator": 0.00042600001324899495,
|
| 283 |
+
"config-lowering": 0.0001900000061141327,
|
| 284 |
+
"constant_folding": 0.000155999994603917,
|
| 285 |
+
"cse": 0.0004360000020824373,
|
| 286 |
+
"dce": 3.600000127335079e-05,
|
| 287 |
+
"dynamic-slice-transpose": 0.00014400000509340316,
|
| 288 |
+
"eliminate-redundant-compare": 0.0001429999974789098,
|
| 289 |
+
"emit-offloaded-dropout": 0.00024300000222865492,
|
| 290 |
+
"flatten-call-graph": 0.0002789999998640269,
|
| 291 |
+
"fuse-send-recv": 0.0013989999424666166,
|
| 292 |
+
"hilo-conditional-to-select": 8.099999831756577e-05,
|
| 293 |
+
"hilo::LegalizeAlias": 0.0032820000778883696,
|
| 294 |
+
"hilo::NeuronInstCombine": 0.0011530000483617187,
|
| 295 |
+
"hilo::NeuronOpFusion": 0.0002010000025620684,
|
| 296 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 0.00039900001138448715,
|
| 297 |
+
"hilo::ScheduleFusion": 3.5000000934815034e-05,
|
| 298 |
+
"hilo::SixtyFourHack": 0.0005590000073425472,
|
| 299 |
+
"hilo::VerifyAliasing": 7.000000186963007e-05,
|
| 300 |
+
"hlo-mac-count": 0.0004199999966658652,
|
| 301 |
+
"io-con-pipe-begin": 4.999999873689376e-06,
|
| 302 |
+
"io-con-pipe-end": 9.999999974752427e-07,
|
| 303 |
+
"io-layout-normalization": 0.0008989999769255519,
|
| 304 |
+
"legalize-ccops-for-tensorizer": 1.5999999959603883e-05,
|
| 305 |
+
"legalize-compare": 0.0003650000144261867,
|
| 306 |
+
"lower-argminmax-custom-call": 0.00015700000221841037,
|
| 307 |
+
"map-inline": 0.0006140000186860561,
|
| 308 |
+
"metadata-naming": 0.0009309999877586961,
|
| 309 |
+
"mlir::detail::OpToOpPassAdaptor": 0.00030700000934302807,
|
| 310 |
+
"mlir::hlo::MhloToPyPenguin": 0.02938299998641014,
|
| 311 |
+
"mlir::mhlo::LowerComplexExtraPass": 0.0031610000878572464,
|
| 312 |
+
"mlir::mhlo::LowerComplexPass": 0.0037410000804811716,
|
| 313 |
+
"native-to-custom-softmax": 0.00034500000765547156,
|
| 314 |
+
"native-to-custom-softmax-dx": 0.0004039999912492931,
|
| 315 |
+
"neuron-hlo-verifier": 0.017588000744581223,
|
| 316 |
+
"operand_upcaster": 0.0006549999816343188,
|
| 317 |
+
"post-par-pipe-begin": 9.999999974752427e-07,
|
| 318 |
+
"post-par-pipe-end": 0.0,
|
| 319 |
+
"post-partition-simplification": 0.061535999178886414,
|
| 320 |
+
"pre-hlo-begin": 3.999999989900971e-06,
|
| 321 |
+
"pre-hlo-end": 9.999999974752427e-07,
|
| 322 |
+
"replace-minimum-constant": 0.0001880000054370612,
|
| 323 |
+
"reshape-mover": 7.000000186963007e-05,
|
| 324 |
+
"simplify-concat": 0.0017259999876841903,
|
| 325 |
+
"simplify-while-loops": 4.8999998398358e-05,
|
| 326 |
+
"transform-variadic-reduce": 0.0006210000137798488,
|
| 327 |
+
"tuple-simplifier": 0.00017600000137463212,
|
| 328 |
+
"unpack-nested-aws-ntwsr": 0.00033000000985339284,
|
| 329 |
+
"unroll-while-loop": 9.000000318337698e-06
|
| 330 |
+
}
|
| 331 |
+
},
|
| 332 |
+
"sg00": {
|
| 333 |
+
"hilo": {
|
| 334 |
+
"ArithmeticIntensity": 3.7139217853546143,
|
| 335 |
+
"HloMacCount": 16344449024.0,
|
| 336 |
+
"Traffic": 8801719296.0
|
| 337 |
+
}
|
| 338 |
+
},
|
| 339 |
+
"sg0000": {
|
| 340 |
+
"compiletime": {
|
| 341 |
+
"AGOrderingAnalysisPass": 2.1720478534698486,
|
| 342 |
+
"AffinePredicateResolution": 0.05626630783081055,
|
| 343 |
+
"AliasDependencyElimination": 0.0026874542236328125,
|
| 344 |
+
"AliasDependencyInduction": 0.5170383453369141,
|
| 345 |
+
"AliasDependencyReset": 2.2118747234344482,
|
| 346 |
+
"BFComputeCutting": 0.12706279754638672,
|
| 347 |
+
"BirCodeGenLoop": 2.4660451412200928,
|
| 348 |
+
"CCOpFusion": 0.8360562324523926,
|
| 349 |
+
"CanonicalizeDAGForPGTiling": 0.2245333194732666,
|
| 350 |
+
"CanonicalizeIR": 0.07496881484985352,
|
| 351 |
+
"CoalesceCCOp": 0.20974230766296387,
|
| 352 |
+
"CommuteConcat": 0.03844571113586426,
|
| 353 |
+
"DMALocalityOpt": 0.0490877628326416,
|
| 354 |
+
"DMAProfiler": 0.09942150115966797,
|
| 355 |
+
"DMATilingProfiler": 0.09033346176147461,
|
| 356 |
+
"DataLocalityOpt": 2.409433364868164,
|
| 357 |
+
"DataStreaming": 0.16509604454040527,
|
| 358 |
+
"DeConcat": 0.035332441329956055,
|
| 359 |
+
"DeadCodeElimination": 0.0381770133972168,
|
| 360 |
+
"DeadStoreElimination": 1.5507030487060547,
|
| 361 |
+
"DelinearIndices": 0.39624762535095215,
|
| 362 |
+
"Delinearization": 0.15817999839782715,
|
| 363 |
+
"DoNothing": 7.104873657226563e-05,
|
| 364 |
+
"DramToDramTranspose": 0.09848809242248535,
|
| 365 |
+
"DumpGraphAndMetadata": 0.2141716480255127,
|
| 366 |
+
"EliminateDivs": 0.19814848899841309,
|
| 367 |
+
"ExpandBatchNorm": 0.0714106559753418,
|
| 368 |
+
"ExpandISAMacro": 0.10404038429260254,
|
| 369 |
+
"FactorizeBlkDims": 0.786508321762085,
|
| 370 |
+
"FactorizeThreadAxesInFreeDims": 0.06115102767944336,
|
| 371 |
+
"FlattenMacroLoop": 0.10332107543945313,
|
| 372 |
+
"GenericAccessSimplifier": 0.03612351417541504,
|
| 373 |
+
"InferInitValue": 1.2103745937347412,
|
| 374 |
+
"InferIntrinsicOnCC": 0.49721264839172363,
|
| 375 |
+
"InferNeuronTensor": 2.2387959957122803,
|
| 376 |
+
"InferNonlocalTensors": 6.154020309448242,
|
| 377 |
+
"InferPSumTensor": 1.2426848411560059,
|
| 378 |
+
"InlineNativeKernels": 0.31959033012390137,
|
| 379 |
+
"InsertIOTransposes": 1.2181267738342285,
|
| 380 |
+
"InsertLocalTransposes": 1.086057424545288,
|
| 381 |
+
"InsertOffloadedTransposes": 0.1012120246887207,
|
| 382 |
+
"LICM": 0.1255204677581787,
|
| 383 |
+
"LateLegalizeInst": 0.19177460670471191,
|
| 384 |
+
"LateLegalizePostSplit": 0.10815072059631348,
|
| 385 |
+
"LateLowerReshapeOp": 0.045404911041259766,
|
| 386 |
+
"LateLowerTensorOp": 0.3547041416168213,
|
| 387 |
+
"LateNeuronInstComb": 0.4665071964263916,
|
| 388 |
+
"LayoutPreprocessing": 1.1901025772094727,
|
| 389 |
+
"LayoutPreprocessingAndAnalysis": 1.6328880786895752,
|
| 390 |
+
"LayoutRequirementAnalysis": 0.42856860160827637,
|
| 391 |
+
"LegalizeCCOpLayout": 0.08699345588684082,
|
| 392 |
+
"LegalizeOpLevelAlias": 0.03149080276489258,
|
| 393 |
+
"LegalizePartitionReduce": 0.09608721733093262,
|
| 394 |
+
"LegalizeSundaAccess": 1.5293858051300049,
|
| 395 |
+
"LegalizeSundaMacro": 0.44698476791381836,
|
| 396 |
+
"LegalizeType": 0.2130870819091797,
|
| 397 |
+
"LocalLayoutOpt": 0.8399438858032227,
|
| 398 |
+
"LoopFusion": 0.40386009216308594,
|
| 399 |
+
"LoopSplitting": 0.05149984359741211,
|
| 400 |
+
"LowerBroadcast": 0.11268091201782227,
|
| 401 |
+
"LowerCCOpBlockAxis": 0.2651100158691406,
|
| 402 |
+
"LowerComplexBroadcast": 0.1815800666809082,
|
| 403 |
+
"LowerIntrinsics": 1.2032275199890137,
|
| 404 |
+
"LowerTensorOp": 0.515345573425293,
|
| 405 |
+
"LowerTranspose": 0.5507981777191162,
|
| 406 |
+
"MacroGeneration": 3.3921492099761963,
|
| 407 |
+
"MaskPropagation": 0.14800381660461426,
|
| 408 |
+
"MemcpyElimination": 5.45711612701416,
|
| 409 |
+
"MutateDataType": 0.04850482940673828,
|
| 410 |
+
"NeuronAliasDependencyInduction": 0.028447866439819336,
|
| 411 |
+
"NeuronAliasDependencyReset": 0.04381752014160156,
|
| 412 |
+
"NeuronInstComb": 0.20571517944335938,
|
| 413 |
+
"NeuronLICM": 0.3387613296508789,
|
| 414 |
+
"NeuronLoopFusion": 1.5814118385314941,
|
| 415 |
+
"NeuronLoopInterchange": 0.06079745292663574,
|
| 416 |
+
"NeuronSimplifier": 0.4541950225830078,
|
| 417 |
+
"NeuronSimplifyPredicates": 0.0973823070526123,
|
| 418 |
+
"NeuronValueNumbering": 0.11469674110412598,
|
| 419 |
+
"OptimizeAliasedCopyChain": 0.018416881561279297,
|
| 420 |
+
"OptimizeNKIKernels": 0.07892012596130371,
|
| 421 |
+
"PAGLayoutOpt": 7.157426357269287,
|
| 422 |
+
"PComputeCutting": 0.45456743240356445,
|
| 423 |
+
"PGLayoutTilingPipeline": 24.0252628326416,
|
| 424 |
+
"PGTiling": 6.715877532958984,
|
| 425 |
+
"PadElimination": 0.013921260833740234,
|
| 426 |
+
"ParAxesAnnotation": 6.056151390075684,
|
| 427 |
+
"PartialLoopFusion": 0.4644014835357666,
|
| 428 |
+
"PartialSimdFusion": 0.4906351566314697,
|
| 429 |
+
"PerfectLoopNest": 0.06508874893188477,
|
| 430 |
+
"RecognizeOpIdiom": 0.2098982334136963,
|
| 431 |
+
"Recompute": 0.008437871932983398,
|
| 432 |
+
"RelaxPredicates": 0.1717524528503418,
|
| 433 |
+
"Rematerialization": 0.265545129776001,
|
| 434 |
+
"ReshapeWeights": 0.021679162979125977,
|
| 435 |
+
"ResolveAccessConflict": 0.26529383659362793,
|
| 436 |
+
"ResolveComplicatePredicates": 0.057276248931884766,
|
| 437 |
+
"RewriteReplicationMatmul": 0.05362248420715332,
|
| 438 |
+
"RewriteWeights": 0.06288814544677734,
|
| 439 |
+
"SFKVectorizer": 7.441895961761475,
|
| 440 |
+
"SimpleAllReduceTiling": 0.0798797607421875,
|
| 441 |
+
"Simplifier": 0.12714624404907227,
|
| 442 |
+
"SimplifyMacroPredicates": 0.21231913566589355,
|
| 443 |
+
"SimplifyNeuronTensor": 0.36804652214050293,
|
| 444 |
+
"SimplifySlice": 0.03702497482299805,
|
| 445 |
+
"SimplifyTensor": 0.24286293983459473,
|
| 446 |
+
"SpillPSum": 0.6902801990509033,
|
| 447 |
+
"SplitAPUnionSets": 0.5079879760742188,
|
| 448 |
+
"SplitAccGrp": 0.05273175239562988,
|
| 449 |
+
"StaticProfiler": 0.1567850112915039,
|
| 450 |
+
"StaticTransposeLocalTensor": 0.46353960037231445,
|
| 451 |
+
"SundaISel": 1.5079319477081299,
|
| 452 |
+
"TCTransform": 0.04103660583496094,
|
| 453 |
+
"TensorInitialization": 0.17437958717346191,
|
| 454 |
+
"TensorOpSimplifier": 0.34393739700317383,
|
| 455 |
+
"TensorOpTransform": 1.1691737174987793,
|
| 456 |
+
"TileCCOps": 0.24624872207641602,
|
| 457 |
+
"TilingProfiler": 0.542656421661377,
|
| 458 |
+
"TransformConvOp": 0.13129019737243652,
|
| 459 |
+
"TritiumFusion": 1.9942443370819092,
|
| 460 |
+
"ValueNumbering": 0.11710119247436523,
|
| 461 |
+
"VectorizeDMA": 0.14786601066589355,
|
| 462 |
+
"VectorizeMatMult": 0.055516958236694336,
|
| 463 |
+
"WeightCoalescing": 0.06569314002990723,
|
| 464 |
+
"ZeroSizeTensorElimination": 0.00036597251892089844
|
| 465 |
+
},
|
| 466 |
+
"tensorizer": {
|
| 467 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 314028,
|
| 468 |
+
"StaticProfiler::AifUb": 16.346195220947266,
|
| 469 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 18.758495330810547,
|
| 470 |
+
"StaticProfiler::AverageDmaLength": 5336.5810546875,
|
| 471 |
+
"StaticProfiler::AverageFractalPeUtilization": 99.34062957763672,
|
| 472 |
+
"StaticProfiler::AveragePartitionUtilization": 98.38597106933594,
|
| 473 |
+
"StaticProfiler::AveragePeUtilization": 97.22911071777344,
|
| 474 |
+
"StaticProfiler::DDRTransferBytes": 8194615604,
|
| 475 |
+
"StaticProfiler::InternalTransferBytes": 976258560,
|
| 476 |
+
"StaticProfiler::LoadExpanded": 1396391,
|
| 477 |
+
"StaticProfiler::LocalizationEfficiency": 114.75756072998047,
|
| 478 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 114.9507064819336,
|
| 479 |
+
"StaticProfiler::StoreExpanded": 79617,
|
| 480 |
+
"StaticProfiler::TotalDMAExpanded": 1476008,
|
| 481 |
+
"StaticProfiler::TotalDynamicInstancesCount": 327331,
|
| 482 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 320559,
|
| 483 |
+
"StaticProfiler::TotalLNCComm": 0,
|
| 484 |
+
"StaticProfiler::TotalLNCCommTransfer": 0,
|
| 485 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0,
|
| 486 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0,
|
| 487 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0,
|
| 488 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0,
|
| 489 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 352,
|
| 490 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 250560,
|
| 491 |
+
"TilingProfiler::NumPfTransposes": 366,
|
| 492 |
+
"TilingProfiler::NumPfTransposesForIo": 39,
|
| 493 |
+
"TilingProfiler::NumPfTransposesForLocal": 182,
|
| 494 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 145,
|
| 495 |
+
"TilingProfiler::PfTransposeInstructions": 34596,
|
| 496 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 28280,
|
| 497 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 1668,
|
| 498 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 4648,
|
| 499 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 720,
|
| 500 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 10459,
|
| 501 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0,
|
| 502 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0,
|
| 503 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0,
|
| 504 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0,
|
| 505 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0,
|
| 506 |
+
"TransformConvOp::conv2d_column_packing": 0,
|
| 507 |
+
"TransformConvOp::conv2d_column_packing_1": 0,
|
| 508 |
+
"TransformConvOp::conv2d_column_packing_io10": 0,
|
| 509 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0
|
| 510 |
+
}
|
| 511 |
+
},
|
| 512 |
+
"tiled_dve_transpose_10tiled_dve_transpose_10_sg0000": {
|
| 513 |
+
"compiletime": {
|
| 514 |
+
"FactorizeBlkDims": 0.0007076263427734375,
|
| 515 |
+
"LateNeuronInstComb": 0.0005652904510498047,
|
| 516 |
+
"LowerBroadcast": 0.00022292137145996094,
|
| 517 |
+
"LowerIntrinsics": 0.0002148151397705078,
|
| 518 |
+
"LowerTranspose": 0.00021576881408691406,
|
| 519 |
+
"NeuronInstComb": 0.0006542205810546875,
|
| 520 |
+
"NeuronValueNumbering": 0.00047016143798828125,
|
| 521 |
+
"SpillPSum": 0.0044786930084228516
|
| 522 |
+
}
|
| 523 |
+
}
|
| 524 |
+
}
|
token_generation_model/_tp0_bk0/graph.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8207c614c7812232bfe4e0f280b5ab5a81bd24487594cbd5adcfb071e41473e6
|
| 3 |
+
size 10415104
|
token_generation_model/_tp0_bk0/log-neuron-cc.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
token_generation_model/_tp0_bk0/metaneff.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19c84ba107b6d20879fc4c203176a3181065b3d48b68ae67a0b2e7bae597866c
|
| 3 |
+
size 928218
|
token_generation_model/_tp0_bk0/model.MODULE_8f245c7816a398e13e79+a9d440f5.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69db8e5767ece577c8c0a9b48e73695bb6c31927dac9bf48d0f2ecdf5265ec9a
|
| 3 |
+
size 904963
|
token_generation_model/_tp0_bk0/model.MODULE_8f245c7816a398e13e79+a9d440f5.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8207c614c7812232bfe4e0f280b5ab5a81bd24487594cbd5adcfb071e41473e6
|
| 3 |
+
size 10415104
|
token_generation_model/_tp0_bk0/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1328f1067680c65a20e2976176e1438ecab6cb49d33d216e7cdd47d6b141f2f4
|
| 3 |
+
size 10590211
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9417dfa2470f086897a0fa5acf4c11e1b05646717bdd7f9d4dc119332c65d421
|
| 3 |
+
size 11422919
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
+
"<|im_start|>",
|
| 216 |
+
"<|im_end|>",
|
| 217 |
+
"<|object_ref_start|>",
|
| 218 |
+
"<|object_ref_end|>",
|
| 219 |
+
"<|box_start|>",
|
| 220 |
+
"<|box_end|>",
|
| 221 |
+
"<|quad_start|>",
|
| 222 |
+
"<|quad_end|>",
|
| 223 |
+
"<|vision_start|>",
|
| 224 |
+
"<|vision_end|>",
|
| 225 |
+
"<|vision_pad|>",
|
| 226 |
+
"<|image_pad|>",
|
| 227 |
+
"<|video_pad|>"
|
| 228 |
+
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
|
| 231 |
+
"clean_up_tokenization_spaces": false,
|
| 232 |
+
"eos_token": "<|im_end|>",
|
| 233 |
+
"errors": "replace",
|
| 234 |
+
"extra_special_tokens": {},
|
| 235 |
+
"max_length": 512,
|
| 236 |
+
"model_max_length": 131072,
|
| 237 |
+
"pad_to_multiple_of": null,
|
| 238 |
+
"pad_token": "<|endoftext|>",
|
| 239 |
+
"pad_token_type_id": 0,
|
| 240 |
+
"padding_side": "left",
|
| 241 |
+
"split_special_tokens": false,
|
| 242 |
+
"stride": 0,
|
| 243 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 244 |
+
"truncation_side": "right",
|
| 245 |
+
"truncation_strategy": "longest_first",
|
| 246 |
+
"unk_token": null
|
| 247 |
+
}
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|