Upload 269 files
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- iteration_3/README.md +109 -0
- iteration_3/compiled/.DS_Store +0 -0
- iteration_3/compiled/bert_fp16_t128.mlmodelc/analytics/coremldata.bin +3 -0
- iteration_3/compiled/bert_fp16_t128.mlmodelc/coremldata.bin +3 -0
- iteration_3/compiled/bert_fp16_t128.mlmodelc/metadata.json +94 -0
- iteration_3/compiled/bert_fp16_t128.mlmodelc/model.mil +442 -0
- iteration_3/compiled/bert_fp16_t128.mlmodelc/weights/weight.bin +3 -0
- iteration_3/compiled/bert_fp16_t256.mlmodelc/analytics/coremldata.bin +3 -0
- iteration_3/compiled/bert_fp16_t256.mlmodelc/coremldata.bin +3 -0
- iteration_3/compiled/bert_fp16_t256.mlmodelc/metadata.json +94 -0
- iteration_3/compiled/bert_fp16_t256.mlmodelc/model.mil +442 -0
- iteration_3/compiled/bert_fp16_t256.mlmodelc/weights/weight.bin +3 -0
- iteration_3/compiled/bert_fp16_t64.mlmodelc/analytics/coremldata.bin +3 -0
- iteration_3/compiled/bert_fp16_t64.mlmodelc/coremldata.bin +3 -0
- iteration_3/compiled/bert_fp16_t64.mlmodelc/metadata.json +94 -0
- iteration_3/compiled/bert_fp16_t64.mlmodelc/model.mil +442 -0
- iteration_3/compiled/bert_fp16_t64.mlmodelc/weights/weight.bin +3 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/analytics/coremldata.bin +3 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/coremldata.bin +3 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/metadata.json +110 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/model.mil +0 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/weights/weight.bin +3 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/analytics/coremldata.bin +3 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/coremldata.bin +3 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/metadata.json +110 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/model.mil +0 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/weights/weight.bin +3 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/analytics/coremldata.bin +3 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/coremldata.bin +3 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/metadata.json +110 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/model.mil +0 -0
- iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/weights/weight.bin +3 -0
- iteration_3/packages/.DS_Store +0 -0
- iteration_3/packages/bert_fp16_t128.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- iteration_3/packages/bert_fp16_t128.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- iteration_3/packages/bert_fp16_t128.mlpackage/Manifest.json +18 -0
- iteration_3/packages/bert_fp16_t256.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- iteration_3/packages/bert_fp16_t256.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- iteration_3/packages/bert_fp16_t256.mlpackage/Manifest.json +18 -0
- iteration_3/packages/bert_fp16_t64.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- iteration_3/packages/bert_fp16_t64.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- iteration_3/packages/bert_fp16_t64.mlpackage/Manifest.json +18 -0
- iteration_3/packages/fused_diffusion_sampler_fp16_t128.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- iteration_3/packages/fused_diffusion_sampler_fp16_t128.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- iteration_3/packages/fused_diffusion_sampler_fp16_t128.mlpackage/Manifest.json +18 -0
- iteration_3/packages/fused_diffusion_sampler_fp16_t256.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- iteration_3/packages/fused_diffusion_sampler_fp16_t256.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- iteration_3/packages/fused_diffusion_sampler_fp16_t256.mlpackage/Manifest.json +18 -0
- iteration_3/packages/fused_diffusion_sampler_fp16_t64.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- iteration_3/packages/fused_diffusion_sampler_fp16_t64.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
iteration_3/README.md
CHANGED
|
@@ -113,3 +113,112 @@ python -m coreml.inference --fp32
|
|
| 113 |
Other quantization tiers (int8 weight-only, int4 palettization) deferred
|
| 114 |
to a future iteration — fp16 already pays for itself on disk and warm
|
| 115 |
latency.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
Other quantization tiers (int8 weight-only, int4 palettization) deferred
|
| 114 |
to a future iteration — fp16 already pays for itself on disk and warm
|
| 115 |
latency.
|
| 116 |
+
|
| 117 |
+
## Token-axis buckets (Trial 11)
|
| 118 |
+
|
| 119 |
+
The `bert` and `fused_diffusion_sampler` packages reject `ct.RangeDim`
|
| 120 |
+
on the token axis (HF Albert + cross-attn produce ops MIL refuses with
|
| 121 |
+
"data-dependent shapes were disabled"). The default packages above
|
| 122 |
+
hard-code T = 57, which caps prompts at ~37 chars.
|
| 123 |
+
|
| 124 |
+
To support longer prompts without RangeDim, this iteration ships
|
| 125 |
+
**three additional fixed-T variants** of each constrained stage:
|
| 126 |
+
|
| 127 |
+
| File | Compute | Size |
|
| 128 |
+
|---------------------------------------------------|--------------|-------|
|
| 129 |
+
| `bert_fp16_t64.mlpackage` | ALL | 12 MB |
|
| 130 |
+
| `bert_fp16_t128.mlpackage` | ALL | 12 MB |
|
| 131 |
+
| `bert_fp16_t256.mlpackage` | ALL | 12 MB |
|
| 132 |
+
| `fused_diffusion_sampler_fp16_t64.mlpackage` | ALL | 48 MB |
|
| 133 |
+
| `fused_diffusion_sampler_fp16_t128.mlpackage` | ALL | 48 MB |
|
| 134 |
+
| `fused_diffusion_sampler_fp16_t256.mlpackage` | ALL | 48 MB |
|
| 135 |
+
| **Sub-total (extra over the 8 defaults)** | | **180 MB** |
|
| 136 |
+
|
| 137 |
+
The original `bert_fp16.mlpackage` / `fused_diffusion_sampler_fp16.mlpackage`
|
| 138 |
+
(T = 57) remain in the manifest as the default fast path — every
|
| 139 |
+
sentence that fits T = 57 should keep using them. The bucketed variants
|
| 140 |
+
are loaded on demand for longer prompts.
|
| 141 |
+
|
| 142 |
+
Loader policy (Swift / Python):
|
| 143 |
+
|
| 144 |
+
```
|
| 145 |
+
real_n = #espeak tokens
|
| 146 |
+
if real_n <= 57: use *_fp16.mlpackage (default)
|
| 147 |
+
elif real_n <= 64: use *_fp16_t64.mlpackage
|
| 148 |
+
elif real_n <= 128: use *_fp16_t128.mlpackage
|
| 149 |
+
elif real_n <= 256: use *_fp16_t256.mlpackage
|
| 150 |
+
else: error (extend the bucket ladder)
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
+
Pad the token + attention_mask tensors with zeros to the chosen
|
| 154 |
+
bucket's T. `bert` honours `attention_mask`, so contamination at
|
| 155 |
+
padded positions is bounded; the sampler attends to bert output, so
|
| 156 |
+
it inherits the same masking.
|
| 157 |
+
|
| 158 |
+
Per-bucket end-to-end inference verified by `coreml/inference_buckets.py
|
| 159 |
+
--all` (writes `coreml/out_t{64,128,256}.wav`):
|
| 160 |
+
|
| 161 |
+
| Bucket | Prompt | Tokens | Audio | Pipeline |
|
| 162 |
+
|--------|--------------------------------------------|--------|--------|----------|
|
| 163 |
+
| 64 | "Hello there. How are you today?" | 36 | 2.42 s | 494 ms |
|
| 164 |
+
| 128 | "StyleTTS 2 is a text to speech model." | 57 | 3.60 s | 414 ms |
|
| 165 |
+
| 256 | longer paragraph (see `inference_buckets.py`) | 154 | 8.37 s | 4933 ms |
|
| 166 |
+
|
| 167 |
+
T = 256 cost is dominated by `decoder_upsample` at 4.5 s / 4.9 s
|
| 168 |
+
(real-time-ish CPU_ONLY at 24 kHz × 8.4 s output). Bucket-swap cost
|
| 169 |
+
itself is a few ms; the rest of the pipeline scales with output
|
| 170 |
+
frame count, not bucket size.
|
| 171 |
+
|
| 172 |
+
**Total iteration_3 footprint with buckets: 451 MB** (274 MB defaults
|
| 173 |
+
+ 180 MB buckets), or skip the T = 57 defaults entirely and ship only
|
| 174 |
+
buckets to save ~60 MB.
|
| 175 |
+
|
| 176 |
+
### Build / refresh the bucketed packages
|
| 177 |
+
|
| 178 |
+
```bash
|
| 179 |
+
cd models/tts/styletts2
|
| 180 |
+
|
| 181 |
+
# Build buckets (writes to coreml/packages/, run once)
|
| 182 |
+
uv run python coreml/build_buckets.py \
|
| 183 |
+
--buckets 64,128,256 --stages bert,sampler --precision fp16
|
| 184 |
+
|
| 185 |
+
# Stage into iteration_3 + compile
|
| 186 |
+
for T in 64 128 256; do
|
| 187 |
+
for stage in bert fused_diffusion_sampler; do
|
| 188 |
+
cp -R "coreml/packages/${stage}_fp16_t${T}.mlpackage" \
|
| 189 |
+
"iteration_3/packages/${stage}_fp16_t${T}.mlpackage"
|
| 190 |
+
xcrun coremlcompiler compile \
|
| 191 |
+
"iteration_3/packages/${stage}_fp16_t${T}.mlpackage" \
|
| 192 |
+
"iteration_3/compiled/"
|
| 193 |
+
done
|
| 194 |
+
done
|
| 195 |
+
|
| 196 |
+
# Validate
|
| 197 |
+
uv run python coreml/inference_buckets.py --all --output-dir coreml
|
| 198 |
+
```
|
| 199 |
+
|
| 200 |
+
### HuggingFace upload manifest
|
| 201 |
+
|
| 202 |
+
Upload the entire `iteration_3/packages/` tree (14 mlpackages):
|
| 203 |
+
|
| 204 |
+
```
|
| 205 |
+
iteration_3/packages/
|
| 206 |
+
├── text_encoder_fp16.mlpackage
|
| 207 |
+
├── bert_fp16.mlpackage ← T=57 default
|
| 208 |
+
├── bert_fp16_t64.mlpackage ← bucket
|
| 209 |
+
├── bert_fp16_t128.mlpackage ← bucket
|
| 210 |
+
├── bert_fp16_t256.mlpackage ← bucket
|
| 211 |
+
├── ref_encoder_fp16.mlpackage
|
| 212 |
+
├── fused_diffusion_sampler_fp16.mlpackage ← T=57 default
|
| 213 |
+
├── fused_diffusion_sampler_fp16_t64.mlpackage ← bucket
|
| 214 |
+
├── fused_diffusion_sampler_fp16_t128.mlpackage ← bucket
|
| 215 |
+
├── fused_diffusion_sampler_fp16_t256.mlpackage ← bucket
|
| 216 |
+
├── duration_predictor_fp16.mlpackage
|
| 217 |
+
├── fused_f0n_har_source.mlpackage ← fp32 (cumsum drift)
|
| 218 |
+
├── decoder_pre_fp16.mlpackage
|
| 219 |
+
└── decoder_upsample_fp16.mlpackage
|
| 220 |
+
```
|
| 221 |
+
|
| 222 |
+
Total: **451 MB** (12 fp16 stages + 1 fp32 stage + 1 cumsum-sensitive
|
| 223 |
+
stage). Compiled `.mlmodelc` siblings live next to the packages in
|
| 224 |
+
`iteration_3/compiled/` — same file count, same total size.
|
iteration_3/compiled/.DS_Store
CHANGED
|
Binary files a/iteration_3/compiled/.DS_Store and b/iteration_3/compiled/.DS_Store differ
|
|
|
iteration_3/compiled/bert_fp16_t128.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ffbc105f1a1ce78756729151d8f8d6669f0dc418d5146ea32f26c26bb6fb555
|
| 3 |
+
size 243
|
iteration_3/compiled/bert_fp16_t128.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:605c075757566cb93de9f4cb848a115ab2e586ab678d134e86fbb1d7646ea28b
|
| 3 |
+
size 441
|
iteration_3/compiled/bert_fp16_t128.mlmodelc/metadata.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"metadataOutputVersion" : "3.0",
|
| 4 |
+
"storagePrecision" : "Float16",
|
| 5 |
+
"outputSchema" : [
|
| 6 |
+
{
|
| 7 |
+
"hasShapeFlexibility" : "0",
|
| 8 |
+
"isOptional" : "0",
|
| 9 |
+
"dataType" : "Float16",
|
| 10 |
+
"formattedType" : "MultiArray (Float16 1 × 128 × 768)",
|
| 11 |
+
"shortDescription" : "",
|
| 12 |
+
"shape" : "[1, 128, 768]",
|
| 13 |
+
"name" : "sequence_output",
|
| 14 |
+
"type" : "MultiArray"
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"hasShapeFlexibility" : "0",
|
| 18 |
+
"isOptional" : "0",
|
| 19 |
+
"dataType" : "Float16",
|
| 20 |
+
"formattedType" : "MultiArray (Float16 1 × 512 × 128)",
|
| 21 |
+
"shortDescription" : "",
|
| 22 |
+
"shape" : "[1, 512, 128]",
|
| 23 |
+
"name" : "var_1030",
|
| 24 |
+
"type" : "MultiArray"
|
| 25 |
+
}
|
| 26 |
+
],
|
| 27 |
+
"modelParameters" : [
|
| 28 |
+
|
| 29 |
+
],
|
| 30 |
+
"specificationVersion" : 9,
|
| 31 |
+
"mlProgramOperationTypeHistogram" : {
|
| 32 |
+
"Ios18.linear" : 74,
|
| 33 |
+
"Ios18.scaledDotProductAttention" : 12,
|
| 34 |
+
"Ios18.sub" : 1,
|
| 35 |
+
"Select" : 2,
|
| 36 |
+
"Ios18.expandDims" : 2,
|
| 37 |
+
"Ios18.gelu" : 12,
|
| 38 |
+
"Ios18.gather" : 1,
|
| 39 |
+
"Ios18.add" : 27,
|
| 40 |
+
"Tile" : 1,
|
| 41 |
+
"Ios18.layerNorm" : 25,
|
| 42 |
+
"Ios18.transpose" : 49,
|
| 43 |
+
"Ios18.cast" : 5,
|
| 44 |
+
"Ios18.reshape" : 48,
|
| 45 |
+
"Ios18.greaterEqual" : 1
|
| 46 |
+
},
|
| 47 |
+
"computePrecision" : "Mixed (Float16, Int16, Int32)",
|
| 48 |
+
"isUpdatable" : "0",
|
| 49 |
+
"stateSchema" : [
|
| 50 |
+
|
| 51 |
+
],
|
| 52 |
+
"availability" : {
|
| 53 |
+
"macOS" : "15.0",
|
| 54 |
+
"tvOS" : "18.0",
|
| 55 |
+
"visionOS" : "2.0",
|
| 56 |
+
"watchOS" : "11.0",
|
| 57 |
+
"iOS" : "18.0",
|
| 58 |
+
"macCatalyst" : "18.0"
|
| 59 |
+
},
|
| 60 |
+
"modelType" : {
|
| 61 |
+
"name" : "MLModelType_mlProgram"
|
| 62 |
+
},
|
| 63 |
+
"userDefinedMetadata" : {
|
| 64 |
+
"com.github.apple.coremltools.conversion_date" : "2026-05-08",
|
| 65 |
+
"com.github.apple.coremltools.source" : "torch==2.11.0",
|
| 66 |
+
"com.github.apple.coremltools.version" : "9.0",
|
| 67 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript"
|
| 68 |
+
},
|
| 69 |
+
"inputSchema" : [
|
| 70 |
+
{
|
| 71 |
+
"hasShapeFlexibility" : "0",
|
| 72 |
+
"isOptional" : "0",
|
| 73 |
+
"dataType" : "Int32",
|
| 74 |
+
"formattedType" : "MultiArray (Int32 1 × 128)",
|
| 75 |
+
"shortDescription" : "",
|
| 76 |
+
"shape" : "[1, 128]",
|
| 77 |
+
"name" : "tokens",
|
| 78 |
+
"type" : "MultiArray"
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"hasShapeFlexibility" : "0",
|
| 82 |
+
"isOptional" : "0",
|
| 83 |
+
"dataType" : "Int32",
|
| 84 |
+
"formattedType" : "MultiArray (Int32 1 × 128)",
|
| 85 |
+
"shortDescription" : "",
|
| 86 |
+
"shape" : "[1, 128]",
|
| 87 |
+
"name" : "attention_mask",
|
| 88 |
+
"type" : "MultiArray"
|
| 89 |
+
}
|
| 90 |
+
],
|
| 91 |
+
"generatedClassName" : "bert_fp16_t128",
|
| 92 |
+
"method" : "predict"
|
| 93 |
+
}
|
| 94 |
+
]
|
iteration_3/compiled/bert_fp16_t128.mlmodelc/model.mil
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
program(1.3)
|
| 2 |
+
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.11.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
|
| 3 |
+
{
|
| 4 |
+
func main<ios18>(tensor<int32, [1, 128]> attention_mask, tensor<int32, [1, 128]> tokens) {
|
| 5 |
+
int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
|
| 6 |
+
bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
|
| 7 |
+
tensor<fp16, [178, 128]> bert_embeddings_word_embeddings_weight_to_fp16 = const()[name = string("bert_embeddings_word_embeddings_weight_to_fp16"), val = tensor<fp16, [178, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
|
| 8 |
+
string tokens_to_int16_dtype_0 = const()[name = string("tokens_to_int16_dtype_0"), val = string("int16")];
|
| 9 |
+
string cast_53_dtype_0 = const()[name = string("cast_53_dtype_0"), val = string("int32")];
|
| 10 |
+
int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
|
| 11 |
+
tensor<int16, [1, 128]> tokens_to_int16 = cast(dtype = tokens_to_int16_dtype_0, x = tokens)[name = string("cast_58")];
|
| 12 |
+
tensor<int32, [1, 128]> cast_53 = cast(dtype = cast_53_dtype_0, x = tokens_to_int16)[name = string("cast_57")];
|
| 13 |
+
tensor<bool, [1, 128]> greater_equal_0 = greater_equal(x = cast_53, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
|
| 14 |
+
int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(178)];
|
| 15 |
+
tensor<int32, [1, 128]> add_0 = add(x = cast_53, y = slice_by_index_0)[name = string("add_0")];
|
| 16 |
+
tensor<int32, [1, 128]> select_0 = select(a = cast_53, b = add_0, cond = greater_equal_0)[name = string("select_0")];
|
| 17 |
+
int32 inputs_embeds_cast_fp16_cast_uint16_axis_0 = const()[name = string("inputs_embeds_cast_fp16_cast_uint16_axis_0"), val = int32(0)];
|
| 18 |
+
string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")];
|
| 19 |
+
tensor<int16, [1, 128]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_56")];
|
| 20 |
+
tensor<fp16, [1, 128, 128]> inputs_embeds_cast_fp16_cast_uint16_cast_uint16 = gather(axis = inputs_embeds_cast_fp16_cast_uint16_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = select_0_to_int16, validate_indices = inputs_embeds_validate_indices_0, x = bert_embeddings_word_embeddings_weight_to_fp16)[name = string("inputs_embeds_cast_fp16_cast_uint16_cast_uint16")];
|
| 21 |
+
tensor<fp16, [1, 128, 128]> token_type_embeddings_1_to_fp16 = const()[name = string("token_type_embeddings_1_to_fp16"), val = tensor<fp16, [1, 128, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45696)))];
|
| 22 |
+
tensor<fp16, [1, 128, 128]> embeddings_1_cast_fp16 = add(x = inputs_embeds_cast_fp16_cast_uint16_cast_uint16, y = token_type_embeddings_1_to_fp16)[name = string("embeddings_1_cast_fp16")];
|
| 23 |
+
tensor<fp16, [1, 128, 128]> position_embeddings_1_to_fp16 = const()[name = string("position_embeddings_1_to_fp16"), val = tensor<fp16, [1, 128, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78528)))];
|
| 24 |
+
tensor<fp16, [1, 128, 128]> input_5_cast_fp16 = add(x = embeddings_1_cast_fp16, y = position_embeddings_1_to_fp16)[name = string("input_5_cast_fp16")];
|
| 25 |
+
tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 26 |
+
tensor<fp16, [128]> bert_embeddings_LayerNorm_weight_to_fp16 = const()[name = string("bert_embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111360)))];
|
| 27 |
+
tensor<fp16, [128]> bert_embeddings_LayerNorm_bias_to_fp16 = const()[name = string("bert_embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111680)))];
|
| 28 |
+
fp16 var_34_to_fp16 = const()[name = string("op_34_to_fp16"), val = fp16(0x1p-24)];
|
| 29 |
+
tensor<fp16, [1, 128, 128]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = bert_embeddings_LayerNorm_bias_to_fp16, epsilon = var_34_to_fp16, gamma = bert_embeddings_LayerNorm_weight_to_fp16, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")];
|
| 30 |
+
tensor<int32, [1]> var_79_axes_0 = const()[name = string("op_79_axes_0"), val = tensor<int32, [1]>([1])];
|
| 31 |
+
tensor<int32, [1, 1, 128]> var_79 = expand_dims(axes = var_79_axes_0, x = attention_mask)[name = string("op_79")];
|
| 32 |
+
tensor<int32, [1]> var_81_axes_0 = const()[name = string("op_81_axes_0"), val = tensor<int32, [1]>([2])];
|
| 33 |
+
tensor<int32, [1, 1, 1, 128]> var_81 = expand_dims(axes = var_81_axes_0, x = var_79)[name = string("op_81")];
|
| 34 |
+
tensor<int32, [4]> var_90_reps_0 = const()[name = string("op_90_reps_0"), val = tensor<int32, [4]>([1, 1, 128, 1])];
|
| 35 |
+
tensor<int32, [1, 1, 128, 128]> var_90 = tile(reps = var_90_reps_0, x = var_81)[name = string("op_90")];
|
| 36 |
+
fp16 var_96_to_fp16 = const()[name = string("op_96_to_fp16"), val = fp16(0x1p+0)];
|
| 37 |
+
string var_95_to_fp16_dtype_0 = const()[name = string("op_95_to_fp16_dtype_0"), val = string("fp16")];
|
| 38 |
+
tensor<fp16, [1, 1, 128, 128]> var_90_to_fp16 = cast(dtype = var_95_to_fp16_dtype_0, x = var_90)[name = string("cast_55")];
|
| 39 |
+
tensor<fp16, [1, 1, 128, 128]> inverted_mask_cast_fp16 = sub(x = var_96_to_fp16, y = var_90_to_fp16)[name = string("inverted_mask_cast_fp16")];
|
| 40 |
+
string var_103_dtype_0 = const()[name = string("op_103_dtype_0"), val = string("bool")];
|
| 41 |
+
fp16 var_104_to_fp16 = const()[name = string("op_104_to_fp16"), val = fp16(-inf)];
|
| 42 |
+
tensor<bool, [1, 1, 128, 128]> inverted_mask_cast_fp16_to_bool = cast(dtype = var_103_dtype_0, x = inverted_mask_cast_fp16)[name = string("cast_54")];
|
| 43 |
+
tensor<fp16, [1, 1, 128, 128]> attention_mask_cast_fp16 = select(a = var_104_to_fp16, b = inverted_mask_cast_fp16, cond = inverted_mask_cast_fp16_to_bool)[name = string("attention_mask_cast_fp16")];
|
| 44 |
+
tensor<fp16, [768, 128]> bert_encoder_embedding_hidden_mapping_in_weight_to_fp16 = const()[name = string("bert_encoder_embedding_hidden_mapping_in_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112000)))];
|
| 45 |
+
tensor<fp16, [768]> bert_encoder_embedding_hidden_mapping_in_bias_to_fp16 = const()[name = string("bert_encoder_embedding_hidden_mapping_in_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308672)))];
|
| 46 |
+
tensor<fp16, [1, 128, 768]> linear_0_cast_fp16 = linear(bias = bert_encoder_embedding_hidden_mapping_in_bias_to_fp16, weight = bert_encoder_embedding_hidden_mapping_in_weight_to_fp16, x = input_7_cast_fp16)[name = string("linear_0_cast_fp16")];
|
| 47 |
+
tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310272)))];
|
| 48 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489984)))];
|
| 49 |
+
tensor<fp16, [1, 128, 768]> linear_1_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_1_cast_fp16")];
|
| 50 |
+
tensor<int32, [4]> var_143 = const()[name = string("op_143"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 51 |
+
tensor<fp16, [1, 128, 12, 64]> x_3_cast_fp16 = reshape(shape = var_143, x = linear_1_cast_fp16)[name = string("x_3_cast_fp16")];
|
| 52 |
+
tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1491584)))];
|
| 53 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2671296)))];
|
| 54 |
+
tensor<fp16, [1, 128, 768]> linear_2_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_2_cast_fp16")];
|
| 55 |
+
tensor<int32, [4]> var_152 = const()[name = string("op_152"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 56 |
+
tensor<fp16, [1, 128, 12, 64]> x_7_cast_fp16 = reshape(shape = var_152, x = linear_2_cast_fp16)[name = string("x_7_cast_fp16")];
|
| 57 |
+
tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2672896)))];
|
| 58 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3852608)))];
|
| 59 |
+
tensor<fp16, [1, 128, 768]> linear_3_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_3_cast_fp16")];
|
| 60 |
+
tensor<int32, [4]> var_161 = const()[name = string("op_161"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 61 |
+
tensor<fp16, [1, 128, 12, 64]> x_11_cast_fp16 = reshape(shape = var_161, x = linear_3_cast_fp16)[name = string("x_11_cast_fp16")];
|
| 62 |
+
tensor<int32, [4]> transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 63 |
+
tensor<int32, [4]> transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 64 |
+
tensor<int32, [4]> transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 65 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_74 = transpose(perm = transpose_74_perm_0, x = x_11_cast_fp16)[name = string("transpose_154")];
|
| 66 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_73 = transpose(perm = transpose_73_perm_0, x = x_7_cast_fp16)[name = string("transpose_155")];
|
| 67 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_72 = transpose(perm = transpose_72_perm_0, x = x_3_cast_fp16)[name = string("transpose_156")];
|
| 68 |
+
tensor<fp16, [1, 12, 128, 64]> attention_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_73, query = transpose_72, value = transpose_74)[name = string("attention_output_1_cast_fp16")];
|
| 69 |
+
tensor<int32, [4]> attention_output_3_perm_0 = const()[name = string("attention_output_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 70 |
+
tensor<int32, [3]> var_167 = const()[name = string("op_167"), val = tensor<int32, [3]>([1, 128, 768])];
|
| 71 |
+
tensor<fp16, [1, 128, 12, 64]> attention_output_3_cast_fp16 = transpose(perm = attention_output_3_perm_0, x = attention_output_1_cast_fp16)[name = string("transpose_153")];
|
| 72 |
+
tensor<fp16, [1, 128, 768]> input_9_cast_fp16 = reshape(shape = var_167, x = attention_output_3_cast_fp16)[name = string("input_9_cast_fp16")];
|
| 73 |
+
tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3854208)))];
|
| 74 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5033920)))];
|
| 75 |
+
tensor<fp16, [1, 128, 768]> linear_4_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_9_cast_fp16)[name = string("linear_4_cast_fp16")];
|
| 76 |
+
tensor<fp16, [1, 128, 768]> input_11_cast_fp16 = add(x = linear_0_cast_fp16, y = linear_4_cast_fp16)[name = string("input_11_cast_fp16")];
|
| 77 |
+
tensor<int32, [1]> input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 78 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5035520)))];
|
| 79 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5037120)))];
|
| 80 |
+
fp16 var_118_to_fp16 = const()[name = string("op_118_to_fp16"), val = fp16(0x1p-24)];
|
| 81 |
+
tensor<fp16, [1, 128, 768]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_11_cast_fp16)[name = string("input_13_cast_fp16")];
|
| 82 |
+
tensor<fp16, [2048, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16"), val = tensor<fp16, [2048, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5038720)))];
|
| 83 |
+
tensor<fp16, [2048]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8184512)))];
|
| 84 |
+
tensor<fp16, [1, 128, 2048]> linear_5_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_13_cast_fp16)[name = string("linear_5_cast_fp16")];
|
| 85 |
+
string input_17_mode_0 = const()[name = string("input_17_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 86 |
+
tensor<fp16, [1, 128, 2048]> input_17_cast_fp16 = gelu(mode = input_17_mode_0, x = linear_5_cast_fp16)[name = string("input_17_cast_fp16")];
|
| 87 |
+
tensor<fp16, [768, 2048]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16"), val = tensor<fp16, [768, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8188672)))];
|
| 88 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11334464)))];
|
| 89 |
+
tensor<fp16, [1, 128, 768]> linear_6_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_17_cast_fp16)[name = string("linear_6_cast_fp16")];
|
| 90 |
+
tensor<fp16, [1, 128, 768]> input_19_cast_fp16 = add(x = linear_6_cast_fp16, y = input_13_cast_fp16)[name = string("input_19_cast_fp16")];
|
| 91 |
+
tensor<int32, [1]> hidden_states_3_axes_0 = const()[name = string("hidden_states_3_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 92 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11336064)))];
|
| 93 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11337664)))];
|
| 94 |
+
tensor<fp16, [1, 128, 768]> hidden_states_3_cast_fp16 = layer_norm(axes = hidden_states_3_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_19_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
|
| 95 |
+
tensor<fp16, [1, 128, 768]> linear_7_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_7_cast_fp16")];
|
| 96 |
+
tensor<int32, [4]> var_218 = const()[name = string("op_218"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 97 |
+
tensor<fp16, [1, 128, 12, 64]> x_15_cast_fp16 = reshape(shape = var_218, x = linear_7_cast_fp16)[name = string("x_15_cast_fp16")];
|
| 98 |
+
tensor<fp16, [1, 128, 768]> linear_8_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_8_cast_fp16")];
|
| 99 |
+
tensor<int32, [4]> var_227 = const()[name = string("op_227"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 100 |
+
tensor<fp16, [1, 128, 12, 64]> x_19_cast_fp16 = reshape(shape = var_227, x = linear_8_cast_fp16)[name = string("x_19_cast_fp16")];
|
| 101 |
+
tensor<fp16, [1, 128, 768]> linear_9_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_9_cast_fp16")];
|
| 102 |
+
tensor<int32, [4]> var_236 = const()[name = string("op_236"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 103 |
+
tensor<fp16, [1, 128, 12, 64]> x_23_cast_fp16 = reshape(shape = var_236, x = linear_9_cast_fp16)[name = string("x_23_cast_fp16")];
|
| 104 |
+
tensor<int32, [4]> transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 105 |
+
tensor<int32, [4]> transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 106 |
+
tensor<int32, [4]> transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 107 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_77 = transpose(perm = transpose_77_perm_0, x = x_23_cast_fp16)[name = string("transpose_150")];
|
| 108 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_76 = transpose(perm = transpose_76_perm_0, x = x_19_cast_fp16)[name = string("transpose_151")];
|
| 109 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_75 = transpose(perm = transpose_75_perm_0, x = x_15_cast_fp16)[name = string("transpose_152")];
|
| 110 |
+
tensor<fp16, [1, 12, 128, 64]> attention_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_76, query = transpose_75, value = transpose_77)[name = string("attention_output_5_cast_fp16")];
|
| 111 |
+
tensor<int32, [4]> attention_output_7_perm_0 = const()[name = string("attention_output_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 112 |
+
tensor<int32, [3]> var_242 = const()[name = string("op_242"), val = tensor<int32, [3]>([1, 128, 768])];
|
| 113 |
+
tensor<fp16, [1, 128, 12, 64]> attention_output_7_cast_fp16 = transpose(perm = attention_output_7_perm_0, x = attention_output_5_cast_fp16)[name = string("transpose_149")];
|
| 114 |
+
tensor<fp16, [1, 128, 768]> input_21_cast_fp16 = reshape(shape = var_242, x = attention_output_7_cast_fp16)[name = string("input_21_cast_fp16")];
|
| 115 |
+
tensor<fp16, [1, 128, 768]> linear_10_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_21_cast_fp16)[name = string("linear_10_cast_fp16")];
|
| 116 |
+
tensor<fp16, [1, 128, 768]> input_23_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = linear_10_cast_fp16)[name = string("input_23_cast_fp16")];
|
| 117 |
+
tensor<int32, [1]> input_25_axes_0 = const()[name = string("input_25_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 118 |
+
tensor<fp16, [1, 128, 768]> input_25_cast_fp16 = layer_norm(axes = input_25_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_23_cast_fp16)[name = string("input_25_cast_fp16")];
|
| 119 |
+
tensor<fp16, [1, 128, 2048]> linear_11_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_25_cast_fp16)[name = string("linear_11_cast_fp16")];
|
| 120 |
+
string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 121 |
+
tensor<fp16, [1, 128, 2048]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = linear_11_cast_fp16)[name = string("input_29_cast_fp16")];
|
| 122 |
+
tensor<fp16, [1, 128, 768]> linear_12_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_29_cast_fp16)[name = string("linear_12_cast_fp16")];
|
| 123 |
+
tensor<fp16, [1, 128, 768]> input_31_cast_fp16 = add(x = linear_12_cast_fp16, y = input_25_cast_fp16)[name = string("input_31_cast_fp16")];
|
| 124 |
+
tensor<int32, [1]> hidden_states_5_axes_0 = const()[name = string("hidden_states_5_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 125 |
+
tensor<fp16, [1, 128, 768]> hidden_states_5_cast_fp16 = layer_norm(axes = hidden_states_5_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_31_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
|
| 126 |
+
tensor<fp16, [1, 128, 768]> linear_13_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_13_cast_fp16")];
|
| 127 |
+
tensor<int32, [4]> var_293 = const()[name = string("op_293"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 128 |
+
tensor<fp16, [1, 128, 12, 64]> x_27_cast_fp16 = reshape(shape = var_293, x = linear_13_cast_fp16)[name = string("x_27_cast_fp16")];
|
| 129 |
+
tensor<fp16, [1, 128, 768]> linear_14_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_14_cast_fp16")];
|
| 130 |
+
tensor<int32, [4]> var_302 = const()[name = string("op_302"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 131 |
+
tensor<fp16, [1, 128, 12, 64]> x_31_cast_fp16 = reshape(shape = var_302, x = linear_14_cast_fp16)[name = string("x_31_cast_fp16")];
|
| 132 |
+
tensor<fp16, [1, 128, 768]> linear_15_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_15_cast_fp16")];
|
| 133 |
+
tensor<int32, [4]> var_311 = const()[name = string("op_311"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 134 |
+
tensor<fp16, [1, 128, 12, 64]> x_35_cast_fp16 = reshape(shape = var_311, x = linear_15_cast_fp16)[name = string("x_35_cast_fp16")];
|
| 135 |
+
tensor<int32, [4]> transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 136 |
+
tensor<int32, [4]> transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 137 |
+
tensor<int32, [4]> transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 138 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_80 = transpose(perm = transpose_80_perm_0, x = x_35_cast_fp16)[name = string("transpose_146")];
|
| 139 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_79 = transpose(perm = transpose_79_perm_0, x = x_31_cast_fp16)[name = string("transpose_147")];
|
| 140 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_78 = transpose(perm = transpose_78_perm_0, x = x_27_cast_fp16)[name = string("transpose_148")];
|
| 141 |
+
tensor<fp16, [1, 12, 128, 64]> attention_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_79, query = transpose_78, value = transpose_80)[name = string("attention_output_9_cast_fp16")];
|
| 142 |
+
tensor<int32, [4]> attention_output_11_perm_0 = const()[name = string("attention_output_11_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 143 |
+
tensor<int32, [3]> var_317 = const()[name = string("op_317"), val = tensor<int32, [3]>([1, 128, 768])];
|
| 144 |
+
tensor<fp16, [1, 128, 12, 64]> attention_output_11_cast_fp16 = transpose(perm = attention_output_11_perm_0, x = attention_output_9_cast_fp16)[name = string("transpose_145")];
|
| 145 |
+
tensor<fp16, [1, 128, 768]> input_33_cast_fp16 = reshape(shape = var_317, x = attention_output_11_cast_fp16)[name = string("input_33_cast_fp16")];
|
| 146 |
+
tensor<fp16, [1, 128, 768]> linear_16_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_33_cast_fp16)[name = string("linear_16_cast_fp16")];
|
| 147 |
+
tensor<fp16, [1, 128, 768]> input_35_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = linear_16_cast_fp16)[name = string("input_35_cast_fp16")];
|
| 148 |
+
tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 149 |
+
tensor<fp16, [1, 128, 768]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("input_37_cast_fp16")];
|
| 150 |
+
tensor<fp16, [1, 128, 2048]> linear_17_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_37_cast_fp16)[name = string("linear_17_cast_fp16")];
|
| 151 |
+
string input_41_mode_0 = const()[name = string("input_41_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 152 |
+
tensor<fp16, [1, 128, 2048]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = linear_17_cast_fp16)[name = string("input_41_cast_fp16")];
|
| 153 |
+
tensor<fp16, [1, 128, 768]> linear_18_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_41_cast_fp16)[name = string("linear_18_cast_fp16")];
|
| 154 |
+
tensor<fp16, [1, 128, 768]> input_43_cast_fp16 = add(x = linear_18_cast_fp16, y = input_37_cast_fp16)[name = string("input_43_cast_fp16")];
|
| 155 |
+
tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 156 |
+
tensor<fp16, [1, 128, 768]> hidden_states_7_cast_fp16 = layer_norm(axes = hidden_states_7_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_43_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
|
| 157 |
+
tensor<fp16, [1, 128, 768]> linear_19_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_19_cast_fp16")];
|
| 158 |
+
tensor<int32, [4]> var_368 = const()[name = string("op_368"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 159 |
+
tensor<fp16, [1, 128, 12, 64]> x_39_cast_fp16 = reshape(shape = var_368, x = linear_19_cast_fp16)[name = string("x_39_cast_fp16")];
|
| 160 |
+
tensor<fp16, [1, 128, 768]> linear_20_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_20_cast_fp16")];
|
| 161 |
+
tensor<int32, [4]> var_377 = const()[name = string("op_377"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 162 |
+
tensor<fp16, [1, 128, 12, 64]> x_43_cast_fp16 = reshape(shape = var_377, x = linear_20_cast_fp16)[name = string("x_43_cast_fp16")];
|
| 163 |
+
tensor<fp16, [1, 128, 768]> linear_21_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_21_cast_fp16")];
|
| 164 |
+
tensor<int32, [4]> var_386 = const()[name = string("op_386"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 165 |
+
tensor<fp16, [1, 128, 12, 64]> x_47_cast_fp16 = reshape(shape = var_386, x = linear_21_cast_fp16)[name = string("x_47_cast_fp16")];
|
| 166 |
+
tensor<int32, [4]> transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 167 |
+
tensor<int32, [4]> transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 168 |
+
tensor<int32, [4]> transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 169 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_83 = transpose(perm = transpose_83_perm_0, x = x_47_cast_fp16)[name = string("transpose_142")];
|
| 170 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_82 = transpose(perm = transpose_82_perm_0, x = x_43_cast_fp16)[name = string("transpose_143")];
|
| 171 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_81 = transpose(perm = transpose_81_perm_0, x = x_39_cast_fp16)[name = string("transpose_144")];
|
| 172 |
+
tensor<fp16, [1, 12, 128, 64]> attention_output_13_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_82, query = transpose_81, value = transpose_83)[name = string("attention_output_13_cast_fp16")];
|
| 173 |
+
tensor<int32, [4]> attention_output_15_perm_0 = const()[name = string("attention_output_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 174 |
+
tensor<int32, [3]> var_392 = const()[name = string("op_392"), val = tensor<int32, [3]>([1, 128, 768])];
|
| 175 |
+
tensor<fp16, [1, 128, 12, 64]> attention_output_15_cast_fp16 = transpose(perm = attention_output_15_perm_0, x = attention_output_13_cast_fp16)[name = string("transpose_141")];
|
| 176 |
+
tensor<fp16, [1, 128, 768]> input_45_cast_fp16 = reshape(shape = var_392, x = attention_output_15_cast_fp16)[name = string("input_45_cast_fp16")];
|
| 177 |
+
tensor<fp16, [1, 128, 768]> linear_22_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_45_cast_fp16)[name = string("linear_22_cast_fp16")];
|
| 178 |
+
tensor<fp16, [1, 128, 768]> input_47_cast_fp16 = add(x = hidden_states_7_cast_fp16, y = linear_22_cast_fp16)[name = string("input_47_cast_fp16")];
|
| 179 |
+
tensor<int32, [1]> input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 180 |
+
tensor<fp16, [1, 128, 768]> input_49_cast_fp16 = layer_norm(axes = input_49_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_47_cast_fp16)[name = string("input_49_cast_fp16")];
|
| 181 |
+
tensor<fp16, [1, 128, 2048]> linear_23_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_49_cast_fp16)[name = string("linear_23_cast_fp16")];
|
| 182 |
+
string input_53_mode_0 = const()[name = string("input_53_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 183 |
+
tensor<fp16, [1, 128, 2048]> input_53_cast_fp16 = gelu(mode = input_53_mode_0, x = linear_23_cast_fp16)[name = string("input_53_cast_fp16")];
|
| 184 |
+
tensor<fp16, [1, 128, 768]> linear_24_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_53_cast_fp16)[name = string("linear_24_cast_fp16")];
|
| 185 |
+
tensor<fp16, [1, 128, 768]> input_55_cast_fp16 = add(x = linear_24_cast_fp16, y = input_49_cast_fp16)[name = string("input_55_cast_fp16")];
|
| 186 |
+
tensor<int32, [1]> hidden_states_9_axes_0 = const()[name = string("hidden_states_9_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 187 |
+
tensor<fp16, [1, 128, 768]> hidden_states_9_cast_fp16 = layer_norm(axes = hidden_states_9_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_55_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
|
| 188 |
+
tensor<fp16, [1, 128, 768]> linear_25_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_25_cast_fp16")];
|
| 189 |
+
tensor<int32, [4]> var_443 = const()[name = string("op_443"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 190 |
+
tensor<fp16, [1, 128, 12, 64]> x_51_cast_fp16 = reshape(shape = var_443, x = linear_25_cast_fp16)[name = string("x_51_cast_fp16")];
|
| 191 |
+
tensor<fp16, [1, 128, 768]> linear_26_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_26_cast_fp16")];
|
| 192 |
+
tensor<int32, [4]> var_452 = const()[name = string("op_452"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 193 |
+
tensor<fp16, [1, 128, 12, 64]> x_55_cast_fp16 = reshape(shape = var_452, x = linear_26_cast_fp16)[name = string("x_55_cast_fp16")];
|
| 194 |
+
tensor<fp16, [1, 128, 768]> linear_27_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_27_cast_fp16")];
|
| 195 |
+
tensor<int32, [4]> var_461 = const()[name = string("op_461"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 196 |
+
tensor<fp16, [1, 128, 12, 64]> x_59_cast_fp16 = reshape(shape = var_461, x = linear_27_cast_fp16)[name = string("x_59_cast_fp16")];
|
| 197 |
+
tensor<int32, [4]> transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 198 |
+
tensor<int32, [4]> transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 199 |
+
tensor<int32, [4]> transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 200 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_86 = transpose(perm = transpose_86_perm_0, x = x_59_cast_fp16)[name = string("transpose_138")];
|
| 201 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_85 = transpose(perm = transpose_85_perm_0, x = x_55_cast_fp16)[name = string("transpose_139")];
|
| 202 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_84 = transpose(perm = transpose_84_perm_0, x = x_51_cast_fp16)[name = string("transpose_140")];
|
| 203 |
+
tensor<fp16, [1, 12, 128, 64]> attention_output_17_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_85, query = transpose_84, value = transpose_86)[name = string("attention_output_17_cast_fp16")];
|
| 204 |
+
tensor<int32, [4]> attention_output_19_perm_0 = const()[name = string("attention_output_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 205 |
+
tensor<int32, [3]> var_467 = const()[name = string("op_467"), val = tensor<int32, [3]>([1, 128, 768])];
|
| 206 |
+
tensor<fp16, [1, 128, 12, 64]> attention_output_19_cast_fp16 = transpose(perm = attention_output_19_perm_0, x = attention_output_17_cast_fp16)[name = string("transpose_137")];
|
| 207 |
+
tensor<fp16, [1, 128, 768]> input_57_cast_fp16 = reshape(shape = var_467, x = attention_output_19_cast_fp16)[name = string("input_57_cast_fp16")];
|
| 208 |
+
tensor<fp16, [1, 128, 768]> linear_28_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_57_cast_fp16)[name = string("linear_28_cast_fp16")];
|
| 209 |
+
tensor<fp16, [1, 128, 768]> input_59_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_28_cast_fp16)[name = string("input_59_cast_fp16")];
|
| 210 |
+
tensor<int32, [1]> input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 211 |
+
tensor<fp16, [1, 128, 768]> input_61_cast_fp16 = layer_norm(axes = input_61_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_59_cast_fp16)[name = string("input_61_cast_fp16")];
|
| 212 |
+
tensor<fp16, [1, 128, 2048]> linear_29_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_61_cast_fp16)[name = string("linear_29_cast_fp16")];
|
| 213 |
+
string input_65_mode_0 = const()[name = string("input_65_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 214 |
+
tensor<fp16, [1, 128, 2048]> input_65_cast_fp16 = gelu(mode = input_65_mode_0, x = linear_29_cast_fp16)[name = string("input_65_cast_fp16")];
|
| 215 |
+
tensor<fp16, [1, 128, 768]> linear_30_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_65_cast_fp16)[name = string("linear_30_cast_fp16")];
|
| 216 |
+
tensor<fp16, [1, 128, 768]> input_67_cast_fp16 = add(x = linear_30_cast_fp16, y = input_61_cast_fp16)[name = string("input_67_cast_fp16")];
|
| 217 |
+
tensor<int32, [1]> hidden_states_11_axes_0 = const()[name = string("hidden_states_11_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 218 |
+
tensor<fp16, [1, 128, 768]> hidden_states_11_cast_fp16 = layer_norm(axes = hidden_states_11_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_67_cast_fp16)[name = string("hidden_states_11_cast_fp16")];
|
| 219 |
+
tensor<fp16, [1, 128, 768]> linear_31_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_31_cast_fp16")];
|
| 220 |
+
tensor<int32, [4]> var_518 = const()[name = string("op_518"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 221 |
+
tensor<fp16, [1, 128, 12, 64]> x_63_cast_fp16 = reshape(shape = var_518, x = linear_31_cast_fp16)[name = string("x_63_cast_fp16")];
|
| 222 |
+
tensor<fp16, [1, 128, 768]> linear_32_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_32_cast_fp16")];
|
| 223 |
+
tensor<int32, [4]> var_527 = const()[name = string("op_527"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 224 |
+
tensor<fp16, [1, 128, 12, 64]> x_67_cast_fp16 = reshape(shape = var_527, x = linear_32_cast_fp16)[name = string("x_67_cast_fp16")];
|
| 225 |
+
tensor<fp16, [1, 128, 768]> linear_33_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_33_cast_fp16")];
|
| 226 |
+
tensor<int32, [4]> var_536 = const()[name = string("op_536"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 227 |
+
tensor<fp16, [1, 128, 12, 64]> x_71_cast_fp16 = reshape(shape = var_536, x = linear_33_cast_fp16)[name = string("x_71_cast_fp16")];
|
| 228 |
+
tensor<int32, [4]> transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 229 |
+
tensor<int32, [4]> transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 230 |
+
tensor<int32, [4]> transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 231 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_89 = transpose(perm = transpose_89_perm_0, x = x_71_cast_fp16)[name = string("transpose_134")];
|
| 232 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_88 = transpose(perm = transpose_88_perm_0, x = x_67_cast_fp16)[name = string("transpose_135")];
|
| 233 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_87 = transpose(perm = transpose_87_perm_0, x = x_63_cast_fp16)[name = string("transpose_136")];
|
| 234 |
+
tensor<fp16, [1, 12, 128, 64]> attention_output_21_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_88, query = transpose_87, value = transpose_89)[name = string("attention_output_21_cast_fp16")];
|
| 235 |
+
tensor<int32, [4]> attention_output_23_perm_0 = const()[name = string("attention_output_23_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 236 |
+
tensor<int32, [3]> var_542 = const()[name = string("op_542"), val = tensor<int32, [3]>([1, 128, 768])];
|
| 237 |
+
tensor<fp16, [1, 128, 12, 64]> attention_output_23_cast_fp16 = transpose(perm = attention_output_23_perm_0, x = attention_output_21_cast_fp16)[name = string("transpose_133")];
|
| 238 |
+
tensor<fp16, [1, 128, 768]> input_69_cast_fp16 = reshape(shape = var_542, x = attention_output_23_cast_fp16)[name = string("input_69_cast_fp16")];
|
| 239 |
+
tensor<fp16, [1, 128, 768]> linear_34_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_69_cast_fp16)[name = string("linear_34_cast_fp16")];
|
| 240 |
+
tensor<fp16, [1, 128, 768]> input_71_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = linear_34_cast_fp16)[name = string("input_71_cast_fp16")];
|
| 241 |
+
tensor<int32, [1]> input_73_axes_0 = const()[name = string("input_73_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 242 |
+
tensor<fp16, [1, 128, 768]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("input_73_cast_fp16")];
|
| 243 |
+
tensor<fp16, [1, 128, 2048]> linear_35_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_73_cast_fp16)[name = string("linear_35_cast_fp16")];
|
| 244 |
+
string input_77_mode_0 = const()[name = string("input_77_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 245 |
+
tensor<fp16, [1, 128, 2048]> input_77_cast_fp16 = gelu(mode = input_77_mode_0, x = linear_35_cast_fp16)[name = string("input_77_cast_fp16")];
|
| 246 |
+
tensor<fp16, [1, 128, 768]> linear_36_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_77_cast_fp16)[name = string("linear_36_cast_fp16")];
|
| 247 |
+
tensor<fp16, [1, 128, 768]> input_79_cast_fp16 = add(x = linear_36_cast_fp16, y = input_73_cast_fp16)[name = string("input_79_cast_fp16")];
|
| 248 |
+
tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 249 |
+
tensor<fp16, [1, 128, 768]> hidden_states_13_cast_fp16 = layer_norm(axes = hidden_states_13_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_79_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
|
| 250 |
+
tensor<fp16, [1, 128, 768]> linear_37_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_37_cast_fp16")];
|
| 251 |
+
tensor<int32, [4]> var_593 = const()[name = string("op_593"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 252 |
+
tensor<fp16, [1, 128, 12, 64]> x_75_cast_fp16 = reshape(shape = var_593, x = linear_37_cast_fp16)[name = string("x_75_cast_fp16")];
|
| 253 |
+
tensor<fp16, [1, 128, 768]> linear_38_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_38_cast_fp16")];
|
| 254 |
+
tensor<int32, [4]> var_602 = const()[name = string("op_602"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 255 |
+
tensor<fp16, [1, 128, 12, 64]> x_79_cast_fp16 = reshape(shape = var_602, x = linear_38_cast_fp16)[name = string("x_79_cast_fp16")];
|
| 256 |
+
tensor<fp16, [1, 128, 768]> linear_39_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_39_cast_fp16")];
|
| 257 |
+
tensor<int32, [4]> var_611 = const()[name = string("op_611"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 258 |
+
tensor<fp16, [1, 128, 12, 64]> x_83_cast_fp16 = reshape(shape = var_611, x = linear_39_cast_fp16)[name = string("x_83_cast_fp16")];
|
| 259 |
+
tensor<int32, [4]> transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 260 |
+
tensor<int32, [4]> transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 261 |
+
tensor<int32, [4]> transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 262 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_92 = transpose(perm = transpose_92_perm_0, x = x_83_cast_fp16)[name = string("transpose_130")];
|
| 263 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_91 = transpose(perm = transpose_91_perm_0, x = x_79_cast_fp16)[name = string("transpose_131")];
|
| 264 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_90 = transpose(perm = transpose_90_perm_0, x = x_75_cast_fp16)[name = string("transpose_132")];
|
| 265 |
+
tensor<fp16, [1, 12, 128, 64]> attention_output_25_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_91, query = transpose_90, value = transpose_92)[name = string("attention_output_25_cast_fp16")];
|
| 266 |
+
tensor<int32, [4]> attention_output_27_perm_0 = const()[name = string("attention_output_27_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 267 |
+
tensor<int32, [3]> var_617 = const()[name = string("op_617"), val = tensor<int32, [3]>([1, 128, 768])];
|
| 268 |
+
tensor<fp16, [1, 128, 12, 64]> attention_output_27_cast_fp16 = transpose(perm = attention_output_27_perm_0, x = attention_output_25_cast_fp16)[name = string("transpose_129")];
|
| 269 |
+
tensor<fp16, [1, 128, 768]> input_81_cast_fp16 = reshape(shape = var_617, x = attention_output_27_cast_fp16)[name = string("input_81_cast_fp16")];
|
| 270 |
+
tensor<fp16, [1, 128, 768]> linear_40_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_81_cast_fp16)[name = string("linear_40_cast_fp16")];
|
| 271 |
+
tensor<fp16, [1, 128, 768]> input_83_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = linear_40_cast_fp16)[name = string("input_83_cast_fp16")];
|
| 272 |
+
tensor<int32, [1]> input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 273 |
+
tensor<fp16, [1, 128, 768]> input_85_cast_fp16 = layer_norm(axes = input_85_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_83_cast_fp16)[name = string("input_85_cast_fp16")];
|
| 274 |
+
tensor<fp16, [1, 128, 2048]> linear_41_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_85_cast_fp16)[name = string("linear_41_cast_fp16")];
|
| 275 |
+
string input_89_mode_0 = const()[name = string("input_89_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 276 |
+
tensor<fp16, [1, 128, 2048]> input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = linear_41_cast_fp16)[name = string("input_89_cast_fp16")];
|
| 277 |
+
tensor<fp16, [1, 128, 768]> linear_42_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_89_cast_fp16)[name = string("linear_42_cast_fp16")];
|
| 278 |
+
tensor<fp16, [1, 128, 768]> input_91_cast_fp16 = add(x = linear_42_cast_fp16, y = input_85_cast_fp16)[name = string("input_91_cast_fp16")];
|
| 279 |
+
tensor<int32, [1]> hidden_states_15_axes_0 = const()[name = string("hidden_states_15_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 280 |
+
tensor<fp16, [1, 128, 768]> hidden_states_15_cast_fp16 = layer_norm(axes = hidden_states_15_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_91_cast_fp16)[name = string("hidden_states_15_cast_fp16")];
|
| 281 |
+
tensor<fp16, [1, 128, 768]> linear_43_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_43_cast_fp16")];
|
| 282 |
+
tensor<int32, [4]> var_668 = const()[name = string("op_668"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 283 |
+
tensor<fp16, [1, 128, 12, 64]> x_87_cast_fp16 = reshape(shape = var_668, x = linear_43_cast_fp16)[name = string("x_87_cast_fp16")];
|
| 284 |
+
tensor<fp16, [1, 128, 768]> linear_44_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_44_cast_fp16")];
|
| 285 |
+
tensor<int32, [4]> var_677 = const()[name = string("op_677"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 286 |
+
tensor<fp16, [1, 128, 12, 64]> x_91_cast_fp16 = reshape(shape = var_677, x = linear_44_cast_fp16)[name = string("x_91_cast_fp16")];
|
| 287 |
+
tensor<fp16, [1, 128, 768]> linear_45_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_45_cast_fp16")];
|
| 288 |
+
tensor<int32, [4]> var_686 = const()[name = string("op_686"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 289 |
+
tensor<fp16, [1, 128, 12, 64]> x_95_cast_fp16 = reshape(shape = var_686, x = linear_45_cast_fp16)[name = string("x_95_cast_fp16")];
|
| 290 |
+
tensor<int32, [4]> transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 291 |
+
tensor<int32, [4]> transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 292 |
+
tensor<int32, [4]> transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 293 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_95 = transpose(perm = transpose_95_perm_0, x = x_95_cast_fp16)[name = string("transpose_126")];
|
| 294 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_94 = transpose(perm = transpose_94_perm_0, x = x_91_cast_fp16)[name = string("transpose_127")];
|
| 295 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_93 = transpose(perm = transpose_93_perm_0, x = x_87_cast_fp16)[name = string("transpose_128")];
|
| 296 |
+
tensor<fp16, [1, 12, 128, 64]> attention_output_29_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_94, query = transpose_93, value = transpose_95)[name = string("attention_output_29_cast_fp16")];
|
| 297 |
+
tensor<int32, [4]> attention_output_31_perm_0 = const()[name = string("attention_output_31_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 298 |
+
tensor<int32, [3]> var_692 = const()[name = string("op_692"), val = tensor<int32, [3]>([1, 128, 768])];
|
| 299 |
+
tensor<fp16, [1, 128, 12, 64]> attention_output_31_cast_fp16 = transpose(perm = attention_output_31_perm_0, x = attention_output_29_cast_fp16)[name = string("transpose_125")];
|
| 300 |
+
tensor<fp16, [1, 128, 768]> input_93_cast_fp16 = reshape(shape = var_692, x = attention_output_31_cast_fp16)[name = string("input_93_cast_fp16")];
|
| 301 |
+
tensor<fp16, [1, 128, 768]> linear_46_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_93_cast_fp16)[name = string("linear_46_cast_fp16")];
|
| 302 |
+
tensor<fp16, [1, 128, 768]> input_95_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = linear_46_cast_fp16)[name = string("input_95_cast_fp16")];
|
| 303 |
+
tensor<int32, [1]> input_97_axes_0 = const()[name = string("input_97_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 304 |
+
tensor<fp16, [1, 128, 768]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_95_cast_fp16)[name = string("input_97_cast_fp16")];
|
| 305 |
+
tensor<fp16, [1, 128, 2048]> linear_47_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_97_cast_fp16)[name = string("linear_47_cast_fp16")];
|
| 306 |
+
string input_101_mode_0 = const()[name = string("input_101_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 307 |
+
tensor<fp16, [1, 128, 2048]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = linear_47_cast_fp16)[name = string("input_101_cast_fp16")];
|
| 308 |
+
tensor<fp16, [1, 128, 768]> linear_48_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_101_cast_fp16)[name = string("linear_48_cast_fp16")];
|
| 309 |
+
tensor<fp16, [1, 128, 768]> input_103_cast_fp16 = add(x = linear_48_cast_fp16, y = input_97_cast_fp16)[name = string("input_103_cast_fp16")];
|
| 310 |
+
tensor<int32, [1]> hidden_states_17_axes_0 = const()[name = string("hidden_states_17_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 311 |
+
tensor<fp16, [1, 128, 768]> hidden_states_17_cast_fp16 = layer_norm(axes = hidden_states_17_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_103_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
|
| 312 |
+
tensor<fp16, [1, 128, 768]> linear_49_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_49_cast_fp16")];
|
| 313 |
+
tensor<int32, [4]> var_743 = const()[name = string("op_743"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 314 |
+
tensor<fp16, [1, 128, 12, 64]> x_99_cast_fp16 = reshape(shape = var_743, x = linear_49_cast_fp16)[name = string("x_99_cast_fp16")];
|
| 315 |
+
tensor<fp16, [1, 128, 768]> linear_50_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_50_cast_fp16")];
|
| 316 |
+
tensor<int32, [4]> var_752 = const()[name = string("op_752"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 317 |
+
tensor<fp16, [1, 128, 12, 64]> x_103_cast_fp16 = reshape(shape = var_752, x = linear_50_cast_fp16)[name = string("x_103_cast_fp16")];
|
| 318 |
+
tensor<fp16, [1, 128, 768]> linear_51_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_51_cast_fp16")];
|
| 319 |
+
tensor<int32, [4]> var_761 = const()[name = string("op_761"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 320 |
+
tensor<fp16, [1, 128, 12, 64]> x_107_cast_fp16 = reshape(shape = var_761, x = linear_51_cast_fp16)[name = string("x_107_cast_fp16")];
|
| 321 |
+
tensor<int32, [4]> transpose_96_perm_0 = const()[name = string("transpose_96_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 322 |
+
tensor<int32, [4]> transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 323 |
+
tensor<int32, [4]> transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 324 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_98 = transpose(perm = transpose_98_perm_0, x = x_107_cast_fp16)[name = string("transpose_122")];
|
| 325 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_97 = transpose(perm = transpose_97_perm_0, x = x_103_cast_fp16)[name = string("transpose_123")];
|
| 326 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_96 = transpose(perm = transpose_96_perm_0, x = x_99_cast_fp16)[name = string("transpose_124")];
|
| 327 |
+
tensor<fp16, [1, 12, 128, 64]> attention_output_33_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_97, query = transpose_96, value = transpose_98)[name = string("attention_output_33_cast_fp16")];
|
| 328 |
+
tensor<int32, [4]> attention_output_35_perm_0 = const()[name = string("attention_output_35_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 329 |
+
tensor<int32, [3]> var_767 = const()[name = string("op_767"), val = tensor<int32, [3]>([1, 128, 768])];
|
| 330 |
+
tensor<fp16, [1, 128, 12, 64]> attention_output_35_cast_fp16 = transpose(perm = attention_output_35_perm_0, x = attention_output_33_cast_fp16)[name = string("transpose_121")];
|
| 331 |
+
tensor<fp16, [1, 128, 768]> input_105_cast_fp16 = reshape(shape = var_767, x = attention_output_35_cast_fp16)[name = string("input_105_cast_fp16")];
|
| 332 |
+
tensor<fp16, [1, 128, 768]> linear_52_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_105_cast_fp16)[name = string("linear_52_cast_fp16")];
|
| 333 |
+
tensor<fp16, [1, 128, 768]> input_107_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_52_cast_fp16)[name = string("input_107_cast_fp16")];
|
| 334 |
+
tensor<int32, [1]> input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 335 |
+
tensor<fp16, [1, 128, 768]> input_109_cast_fp16 = layer_norm(axes = input_109_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")];
|
| 336 |
+
tensor<fp16, [1, 128, 2048]> linear_53_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_109_cast_fp16)[name = string("linear_53_cast_fp16")];
|
| 337 |
+
string input_113_mode_0 = const()[name = string("input_113_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 338 |
+
tensor<fp16, [1, 128, 2048]> input_113_cast_fp16 = gelu(mode = input_113_mode_0, x = linear_53_cast_fp16)[name = string("input_113_cast_fp16")];
|
| 339 |
+
tensor<fp16, [1, 128, 768]> linear_54_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_113_cast_fp16)[name = string("linear_54_cast_fp16")];
|
| 340 |
+
tensor<fp16, [1, 128, 768]> input_115_cast_fp16 = add(x = linear_54_cast_fp16, y = input_109_cast_fp16)[name = string("input_115_cast_fp16")];
|
| 341 |
+
tensor<int32, [1]> hidden_states_19_axes_0 = const()[name = string("hidden_states_19_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 342 |
+
tensor<fp16, [1, 128, 768]> hidden_states_19_cast_fp16 = layer_norm(axes = hidden_states_19_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_115_cast_fp16)[name = string("hidden_states_19_cast_fp16")];
|
| 343 |
+
tensor<fp16, [1, 128, 768]> linear_55_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_55_cast_fp16")];
|
| 344 |
+
tensor<int32, [4]> var_818 = const()[name = string("op_818"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 345 |
+
tensor<fp16, [1, 128, 12, 64]> x_111_cast_fp16 = reshape(shape = var_818, x = linear_55_cast_fp16)[name = string("x_111_cast_fp16")];
|
| 346 |
+
tensor<fp16, [1, 128, 768]> linear_56_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_56_cast_fp16")];
|
| 347 |
+
tensor<int32, [4]> var_827 = const()[name = string("op_827"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 348 |
+
tensor<fp16, [1, 128, 12, 64]> x_115_cast_fp16 = reshape(shape = var_827, x = linear_56_cast_fp16)[name = string("x_115_cast_fp16")];
|
| 349 |
+
tensor<fp16, [1, 128, 768]> linear_57_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_57_cast_fp16")];
|
| 350 |
+
tensor<int32, [4]> var_836 = const()[name = string("op_836"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 351 |
+
tensor<fp16, [1, 128, 12, 64]> x_119_cast_fp16 = reshape(shape = var_836, x = linear_57_cast_fp16)[name = string("x_119_cast_fp16")];
|
| 352 |
+
tensor<int32, [4]> transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 353 |
+
tensor<int32, [4]> transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 354 |
+
tensor<int32, [4]> transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 355 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_101 = transpose(perm = transpose_101_perm_0, x = x_119_cast_fp16)[name = string("transpose_118")];
|
| 356 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_100 = transpose(perm = transpose_100_perm_0, x = x_115_cast_fp16)[name = string("transpose_119")];
|
| 357 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_99 = transpose(perm = transpose_99_perm_0, x = x_111_cast_fp16)[name = string("transpose_120")];
|
| 358 |
+
tensor<fp16, [1, 12, 128, 64]> attention_output_37_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_100, query = transpose_99, value = transpose_101)[name = string("attention_output_37_cast_fp16")];
|
| 359 |
+
tensor<int32, [4]> attention_output_39_perm_0 = const()[name = string("attention_output_39_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 360 |
+
tensor<int32, [3]> var_842 = const()[name = string("op_842"), val = tensor<int32, [3]>([1, 128, 768])];
|
| 361 |
+
tensor<fp16, [1, 128, 12, 64]> attention_output_39_cast_fp16 = transpose(perm = attention_output_39_perm_0, x = attention_output_37_cast_fp16)[name = string("transpose_117")];
|
| 362 |
+
tensor<fp16, [1, 128, 768]> input_117_cast_fp16 = reshape(shape = var_842, x = attention_output_39_cast_fp16)[name = string("input_117_cast_fp16")];
|
| 363 |
+
tensor<fp16, [1, 128, 768]> linear_58_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_117_cast_fp16)[name = string("linear_58_cast_fp16")];
|
| 364 |
+
tensor<fp16, [1, 128, 768]> input_119_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = linear_58_cast_fp16)[name = string("input_119_cast_fp16")];
|
| 365 |
+
tensor<int32, [1]> input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 366 |
+
tensor<fp16, [1, 128, 768]> input_121_cast_fp16 = layer_norm(axes = input_121_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("input_121_cast_fp16")];
|
| 367 |
+
tensor<fp16, [1, 128, 2048]> linear_59_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_121_cast_fp16)[name = string("linear_59_cast_fp16")];
|
| 368 |
+
string input_125_mode_0 = const()[name = string("input_125_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 369 |
+
tensor<fp16, [1, 128, 2048]> input_125_cast_fp16 = gelu(mode = input_125_mode_0, x = linear_59_cast_fp16)[name = string("input_125_cast_fp16")];
|
| 370 |
+
tensor<fp16, [1, 128, 768]> linear_60_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_125_cast_fp16)[name = string("linear_60_cast_fp16")];
|
| 371 |
+
tensor<fp16, [1, 128, 768]> input_127_cast_fp16 = add(x = linear_60_cast_fp16, y = input_121_cast_fp16)[name = string("input_127_cast_fp16")];
|
| 372 |
+
tensor<int32, [1]> hidden_states_21_axes_0 = const()[name = string("hidden_states_21_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 373 |
+
tensor<fp16, [1, 128, 768]> hidden_states_21_cast_fp16 = layer_norm(axes = hidden_states_21_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_127_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
|
| 374 |
+
tensor<fp16, [1, 128, 768]> linear_61_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_61_cast_fp16")];
|
| 375 |
+
tensor<int32, [4]> var_893 = const()[name = string("op_893"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 376 |
+
tensor<fp16, [1, 128, 12, 64]> x_123_cast_fp16 = reshape(shape = var_893, x = linear_61_cast_fp16)[name = string("x_123_cast_fp16")];
|
| 377 |
+
tensor<fp16, [1, 128, 768]> linear_62_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_62_cast_fp16")];
|
| 378 |
+
tensor<int32, [4]> var_902 = const()[name = string("op_902"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 379 |
+
tensor<fp16, [1, 128, 12, 64]> x_127_cast_fp16 = reshape(shape = var_902, x = linear_62_cast_fp16)[name = string("x_127_cast_fp16")];
|
| 380 |
+
tensor<fp16, [1, 128, 768]> linear_63_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_63_cast_fp16")];
|
| 381 |
+
tensor<int32, [4]> var_911 = const()[name = string("op_911"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 382 |
+
tensor<fp16, [1, 128, 12, 64]> x_131_cast_fp16 = reshape(shape = var_911, x = linear_63_cast_fp16)[name = string("x_131_cast_fp16")];
|
| 383 |
+
tensor<int32, [4]> transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 384 |
+
tensor<int32, [4]> transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 385 |
+
tensor<int32, [4]> transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 386 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_104 = transpose(perm = transpose_104_perm_0, x = x_131_cast_fp16)[name = string("transpose_114")];
|
| 387 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_103 = transpose(perm = transpose_103_perm_0, x = x_127_cast_fp16)[name = string("transpose_115")];
|
| 388 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_102 = transpose(perm = transpose_102_perm_0, x = x_123_cast_fp16)[name = string("transpose_116")];
|
| 389 |
+
tensor<fp16, [1, 12, 128, 64]> attention_output_41_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_103, query = transpose_102, value = transpose_104)[name = string("attention_output_41_cast_fp16")];
|
| 390 |
+
tensor<int32, [4]> attention_output_43_perm_0 = const()[name = string("attention_output_43_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 391 |
+
tensor<int32, [3]> var_917 = const()[name = string("op_917"), val = tensor<int32, [3]>([1, 128, 768])];
|
| 392 |
+
tensor<fp16, [1, 128, 12, 64]> attention_output_43_cast_fp16 = transpose(perm = attention_output_43_perm_0, x = attention_output_41_cast_fp16)[name = string("transpose_113")];
|
| 393 |
+
tensor<fp16, [1, 128, 768]> input_129_cast_fp16 = reshape(shape = var_917, x = attention_output_43_cast_fp16)[name = string("input_129_cast_fp16")];
|
| 394 |
+
tensor<fp16, [1, 128, 768]> linear_64_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_129_cast_fp16)[name = string("linear_64_cast_fp16")];
|
| 395 |
+
tensor<fp16, [1, 128, 768]> input_131_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = linear_64_cast_fp16)[name = string("input_131_cast_fp16")];
|
| 396 |
+
tensor<int32, [1]> input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 397 |
+
tensor<fp16, [1, 128, 768]> input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_131_cast_fp16)[name = string("input_133_cast_fp16")];
|
| 398 |
+
tensor<fp16, [1, 128, 2048]> linear_65_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_133_cast_fp16)[name = string("linear_65_cast_fp16")];
|
| 399 |
+
string input_137_mode_0 = const()[name = string("input_137_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 400 |
+
tensor<fp16, [1, 128, 2048]> input_137_cast_fp16 = gelu(mode = input_137_mode_0, x = linear_65_cast_fp16)[name = string("input_137_cast_fp16")];
|
| 401 |
+
tensor<fp16, [1, 128, 768]> linear_66_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_137_cast_fp16)[name = string("linear_66_cast_fp16")];
|
| 402 |
+
tensor<fp16, [1, 128, 768]> input_139_cast_fp16 = add(x = linear_66_cast_fp16, y = input_133_cast_fp16)[name = string("input_139_cast_fp16")];
|
| 403 |
+
tensor<int32, [1]> hidden_states_axes_0 = const()[name = string("hidden_states_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 404 |
+
tensor<fp16, [1, 128, 768]> hidden_states_cast_fp16 = layer_norm(axes = hidden_states_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_139_cast_fp16)[name = string("hidden_states_cast_fp16")];
|
| 405 |
+
tensor<fp16, [1, 128, 768]> linear_67_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_67_cast_fp16")];
|
| 406 |
+
tensor<int32, [4]> var_968 = const()[name = string("op_968"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 407 |
+
tensor<fp16, [1, 128, 12, 64]> x_135_cast_fp16 = reshape(shape = var_968, x = linear_67_cast_fp16)[name = string("x_135_cast_fp16")];
|
| 408 |
+
tensor<fp16, [1, 128, 768]> linear_68_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_68_cast_fp16")];
|
| 409 |
+
tensor<int32, [4]> var_977 = const()[name = string("op_977"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 410 |
+
tensor<fp16, [1, 128, 12, 64]> x_139_cast_fp16 = reshape(shape = var_977, x = linear_68_cast_fp16)[name = string("x_139_cast_fp16")];
|
| 411 |
+
tensor<fp16, [1, 128, 768]> linear_69_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_69_cast_fp16")];
|
| 412 |
+
tensor<int32, [4]> var_986 = const()[name = string("op_986"), val = tensor<int32, [4]>([1, 128, 12, 64])];
|
| 413 |
+
tensor<fp16, [1, 128, 12, 64]> x_cast_fp16 = reshape(shape = var_986, x = linear_69_cast_fp16)[name = string("x_cast_fp16")];
|
| 414 |
+
tensor<int32, [4]> transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 415 |
+
tensor<int32, [4]> transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 416 |
+
tensor<int32, [4]> transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 417 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_107 = transpose(perm = transpose_107_perm_0, x = x_cast_fp16)[name = string("transpose_110")];
|
| 418 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_106 = transpose(perm = transpose_106_perm_0, x = x_139_cast_fp16)[name = string("transpose_111")];
|
| 419 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_105 = transpose(perm = transpose_105_perm_0, x = x_135_cast_fp16)[name = string("transpose_112")];
|
| 420 |
+
tensor<fp16, [1, 12, 128, 64]> attention_output_45_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_106, query = transpose_105, value = transpose_107)[name = string("attention_output_45_cast_fp16")];
|
| 421 |
+
tensor<int32, [4]> attention_output_perm_0 = const()[name = string("attention_output_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 422 |
+
tensor<int32, [3]> var_992 = const()[name = string("op_992"), val = tensor<int32, [3]>([1, 128, 768])];
|
| 423 |
+
tensor<fp16, [1, 128, 12, 64]> attention_output_cast_fp16 = transpose(perm = attention_output_perm_0, x = attention_output_45_cast_fp16)[name = string("transpose_109")];
|
| 424 |
+
tensor<fp16, [1, 128, 768]> input_141_cast_fp16 = reshape(shape = var_992, x = attention_output_cast_fp16)[name = string("input_141_cast_fp16")];
|
| 425 |
+
tensor<fp16, [1, 128, 768]> linear_70_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_141_cast_fp16)[name = string("linear_70_cast_fp16")];
|
| 426 |
+
tensor<fp16, [1, 128, 768]> input_143_cast_fp16 = add(x = hidden_states_cast_fp16, y = linear_70_cast_fp16)[name = string("input_143_cast_fp16")];
|
| 427 |
+
tensor<int32, [1]> input_145_axes_0 = const()[name = string("input_145_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 428 |
+
tensor<fp16, [1, 128, 768]> input_145_cast_fp16 = layer_norm(axes = input_145_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_143_cast_fp16)[name = string("input_145_cast_fp16")];
|
| 429 |
+
tensor<fp16, [1, 128, 2048]> linear_71_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_145_cast_fp16)[name = string("linear_71_cast_fp16")];
|
| 430 |
+
string input_149_mode_0 = const()[name = string("input_149_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 431 |
+
tensor<fp16, [1, 128, 2048]> input_149_cast_fp16 = gelu(mode = input_149_mode_0, x = linear_71_cast_fp16)[name = string("input_149_cast_fp16")];
|
| 432 |
+
tensor<fp16, [1, 128, 768]> linear_72_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_149_cast_fp16)[name = string("linear_72_cast_fp16")];
|
| 433 |
+
tensor<fp16, [1, 128, 768]> input_151_cast_fp16 = add(x = linear_72_cast_fp16, y = input_145_cast_fp16)[name = string("input_151_cast_fp16")];
|
| 434 |
+
tensor<int32, [1]> sequence_output_axes_0 = const()[name = string("sequence_output_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 435 |
+
tensor<fp16, [1, 128, 768]> sequence_output = layer_norm(axes = sequence_output_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_151_cast_fp16)[name = string("sequence_output_cast_fp16")];
|
| 436 |
+
tensor<fp16, [512, 768]> bert_encoder_weight_to_fp16 = const()[name = string("bert_encoder_weight_to_fp16"), val = tensor<fp16, [512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11339264)))];
|
| 437 |
+
tensor<fp16, [512]> bert_encoder_bias_to_fp16 = const()[name = string("bert_encoder_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12125760)))];
|
| 438 |
+
tensor<fp16, [1, 128, 512]> linear_73_cast_fp16 = linear(bias = bert_encoder_bias_to_fp16, weight = bert_encoder_weight_to_fp16, x = sequence_output)[name = string("linear_73_cast_fp16")];
|
| 439 |
+
tensor<int32, [3]> var_1030_perm_0 = const()[name = string("op_1030_perm_0"), val = tensor<int32, [3]>([0, -1, -2])];
|
| 440 |
+
tensor<fp16, [1, 512, 128]> var_1030 = transpose(perm = var_1030_perm_0, x = linear_73_cast_fp16)[name = string("transpose_108")];
|
| 441 |
+
} -> (sequence_output, var_1030);
|
| 442 |
+
}
|
iteration_3/compiled/bert_fp16_t128.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ff3ca8fac0332427ddfe5e78954382359d26516284113001a7484b60455eb10
|
| 3 |
+
size 12126848
|
iteration_3/compiled/bert_fp16_t256.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cdd98e544fdd1d3002b708faa81092ba37b68369dfedee5cb0152a8340b68bdc
|
| 3 |
+
size 243
|
iteration_3/compiled/bert_fp16_t256.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2aba3b0f5fbc0614df555dcbfd939baa534dece2cf898b67db6a99169e55875a
|
| 3 |
+
size 441
|
iteration_3/compiled/bert_fp16_t256.mlmodelc/metadata.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"metadataOutputVersion" : "3.0",
|
| 4 |
+
"storagePrecision" : "Float16",
|
| 5 |
+
"outputSchema" : [
|
| 6 |
+
{
|
| 7 |
+
"hasShapeFlexibility" : "0",
|
| 8 |
+
"isOptional" : "0",
|
| 9 |
+
"dataType" : "Float16",
|
| 10 |
+
"formattedType" : "MultiArray (Float16 1 × 256 × 768)",
|
| 11 |
+
"shortDescription" : "",
|
| 12 |
+
"shape" : "[1, 256, 768]",
|
| 13 |
+
"name" : "sequence_output",
|
| 14 |
+
"type" : "MultiArray"
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"hasShapeFlexibility" : "0",
|
| 18 |
+
"isOptional" : "0",
|
| 19 |
+
"dataType" : "Float16",
|
| 20 |
+
"formattedType" : "MultiArray (Float16 1 × 512 × 256)",
|
| 21 |
+
"shortDescription" : "",
|
| 22 |
+
"shape" : "[1, 512, 256]",
|
| 23 |
+
"name" : "var_1030",
|
| 24 |
+
"type" : "MultiArray"
|
| 25 |
+
}
|
| 26 |
+
],
|
| 27 |
+
"modelParameters" : [
|
| 28 |
+
|
| 29 |
+
],
|
| 30 |
+
"specificationVersion" : 9,
|
| 31 |
+
"mlProgramOperationTypeHistogram" : {
|
| 32 |
+
"Ios18.linear" : 74,
|
| 33 |
+
"Ios18.scaledDotProductAttention" : 12,
|
| 34 |
+
"Ios18.sub" : 1,
|
| 35 |
+
"Select" : 2,
|
| 36 |
+
"Ios18.expandDims" : 2,
|
| 37 |
+
"Ios18.gelu" : 12,
|
| 38 |
+
"Ios18.gather" : 1,
|
| 39 |
+
"Ios18.add" : 27,
|
| 40 |
+
"Tile" : 1,
|
| 41 |
+
"Ios18.layerNorm" : 25,
|
| 42 |
+
"Ios18.transpose" : 49,
|
| 43 |
+
"Ios18.cast" : 5,
|
| 44 |
+
"Ios18.reshape" : 48,
|
| 45 |
+
"Ios18.greaterEqual" : 1
|
| 46 |
+
},
|
| 47 |
+
"computePrecision" : "Mixed (Float16, Int16, Int32)",
|
| 48 |
+
"isUpdatable" : "0",
|
| 49 |
+
"stateSchema" : [
|
| 50 |
+
|
| 51 |
+
],
|
| 52 |
+
"availability" : {
|
| 53 |
+
"macOS" : "15.0",
|
| 54 |
+
"tvOS" : "18.0",
|
| 55 |
+
"visionOS" : "2.0",
|
| 56 |
+
"watchOS" : "11.0",
|
| 57 |
+
"iOS" : "18.0",
|
| 58 |
+
"macCatalyst" : "18.0"
|
| 59 |
+
},
|
| 60 |
+
"modelType" : {
|
| 61 |
+
"name" : "MLModelType_mlProgram"
|
| 62 |
+
},
|
| 63 |
+
"userDefinedMetadata" : {
|
| 64 |
+
"com.github.apple.coremltools.conversion_date" : "2026-05-08",
|
| 65 |
+
"com.github.apple.coremltools.source" : "torch==2.11.0",
|
| 66 |
+
"com.github.apple.coremltools.version" : "9.0",
|
| 67 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript"
|
| 68 |
+
},
|
| 69 |
+
"inputSchema" : [
|
| 70 |
+
{
|
| 71 |
+
"hasShapeFlexibility" : "0",
|
| 72 |
+
"isOptional" : "0",
|
| 73 |
+
"dataType" : "Int32",
|
| 74 |
+
"formattedType" : "MultiArray (Int32 1 × 256)",
|
| 75 |
+
"shortDescription" : "",
|
| 76 |
+
"shape" : "[1, 256]",
|
| 77 |
+
"name" : "tokens",
|
| 78 |
+
"type" : "MultiArray"
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"hasShapeFlexibility" : "0",
|
| 82 |
+
"isOptional" : "0",
|
| 83 |
+
"dataType" : "Int32",
|
| 84 |
+
"formattedType" : "MultiArray (Int32 1 × 256)",
|
| 85 |
+
"shortDescription" : "",
|
| 86 |
+
"shape" : "[1, 256]",
|
| 87 |
+
"name" : "attention_mask",
|
| 88 |
+
"type" : "MultiArray"
|
| 89 |
+
}
|
| 90 |
+
],
|
| 91 |
+
"generatedClassName" : "bert_fp16_t256",
|
| 92 |
+
"method" : "predict"
|
| 93 |
+
}
|
| 94 |
+
]
|
iteration_3/compiled/bert_fp16_t256.mlmodelc/model.mil
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
program(1.3)
|
| 2 |
+
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.11.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
|
| 3 |
+
{
|
| 4 |
+
func main<ios18>(tensor<int32, [1, 256]> attention_mask, tensor<int32, [1, 256]> tokens) {
|
| 5 |
+
int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
|
| 6 |
+
bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
|
| 7 |
+
tensor<fp16, [178, 128]> bert_embeddings_word_embeddings_weight_to_fp16 = const()[name = string("bert_embeddings_word_embeddings_weight_to_fp16"), val = tensor<fp16, [178, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
|
| 8 |
+
string tokens_to_int16_dtype_0 = const()[name = string("tokens_to_int16_dtype_0"), val = string("int16")];
|
| 9 |
+
string cast_53_dtype_0 = const()[name = string("cast_53_dtype_0"), val = string("int32")];
|
| 10 |
+
int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
|
| 11 |
+
tensor<int16, [1, 256]> tokens_to_int16 = cast(dtype = tokens_to_int16_dtype_0, x = tokens)[name = string("cast_58")];
|
| 12 |
+
tensor<int32, [1, 256]> cast_53 = cast(dtype = cast_53_dtype_0, x = tokens_to_int16)[name = string("cast_57")];
|
| 13 |
+
tensor<bool, [1, 256]> greater_equal_0 = greater_equal(x = cast_53, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
|
| 14 |
+
int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(178)];
|
| 15 |
+
tensor<int32, [1, 256]> add_0 = add(x = cast_53, y = slice_by_index_0)[name = string("add_0")];
|
| 16 |
+
tensor<int32, [1, 256]> select_0 = select(a = cast_53, b = add_0, cond = greater_equal_0)[name = string("select_0")];
|
| 17 |
+
int32 inputs_embeds_cast_fp16_cast_uint16_axis_0 = const()[name = string("inputs_embeds_cast_fp16_cast_uint16_axis_0"), val = int32(0)];
|
| 18 |
+
string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")];
|
| 19 |
+
tensor<int16, [1, 256]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_56")];
|
| 20 |
+
tensor<fp16, [1, 256, 128]> inputs_embeds_cast_fp16_cast_uint16_cast_uint16 = gather(axis = inputs_embeds_cast_fp16_cast_uint16_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = select_0_to_int16, validate_indices = inputs_embeds_validate_indices_0, x = bert_embeddings_word_embeddings_weight_to_fp16)[name = string("inputs_embeds_cast_fp16_cast_uint16_cast_uint16")];
|
| 21 |
+
tensor<fp16, [1, 256, 128]> token_type_embeddings_1_to_fp16 = const()[name = string("token_type_embeddings_1_to_fp16"), val = tensor<fp16, [1, 256, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45696)))];
|
| 22 |
+
tensor<fp16, [1, 256, 128]> embeddings_1_cast_fp16 = add(x = inputs_embeds_cast_fp16_cast_uint16_cast_uint16, y = token_type_embeddings_1_to_fp16)[name = string("embeddings_1_cast_fp16")];
|
| 23 |
+
tensor<fp16, [1, 256, 128]> position_embeddings_1_to_fp16 = const()[name = string("position_embeddings_1_to_fp16"), val = tensor<fp16, [1, 256, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111296)))];
|
| 24 |
+
tensor<fp16, [1, 256, 128]> input_5_cast_fp16 = add(x = embeddings_1_cast_fp16, y = position_embeddings_1_to_fp16)[name = string("input_5_cast_fp16")];
|
| 25 |
+
tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 26 |
+
tensor<fp16, [128]> bert_embeddings_LayerNorm_weight_to_fp16 = const()[name = string("bert_embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176896)))];
|
| 27 |
+
tensor<fp16, [128]> bert_embeddings_LayerNorm_bias_to_fp16 = const()[name = string("bert_embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177216)))];
|
| 28 |
+
fp16 var_34_to_fp16 = const()[name = string("op_34_to_fp16"), val = fp16(0x1p-24)];
|
| 29 |
+
tensor<fp16, [1, 256, 128]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = bert_embeddings_LayerNorm_bias_to_fp16, epsilon = var_34_to_fp16, gamma = bert_embeddings_LayerNorm_weight_to_fp16, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")];
|
| 30 |
+
tensor<int32, [1]> var_79_axes_0 = const()[name = string("op_79_axes_0"), val = tensor<int32, [1]>([1])];
|
| 31 |
+
tensor<int32, [1, 1, 256]> var_79 = expand_dims(axes = var_79_axes_0, x = attention_mask)[name = string("op_79")];
|
| 32 |
+
tensor<int32, [1]> var_81_axes_0 = const()[name = string("op_81_axes_0"), val = tensor<int32, [1]>([2])];
|
| 33 |
+
tensor<int32, [1, 1, 1, 256]> var_81 = expand_dims(axes = var_81_axes_0, x = var_79)[name = string("op_81")];
|
| 34 |
+
tensor<int32, [4]> var_90_reps_0 = const()[name = string("op_90_reps_0"), val = tensor<int32, [4]>([1, 1, 256, 1])];
|
| 35 |
+
tensor<int32, [1, 1, 256, 256]> var_90 = tile(reps = var_90_reps_0, x = var_81)[name = string("op_90")];
|
| 36 |
+
fp16 var_96_to_fp16 = const()[name = string("op_96_to_fp16"), val = fp16(0x1p+0)];
|
| 37 |
+
string var_95_to_fp16_dtype_0 = const()[name = string("op_95_to_fp16_dtype_0"), val = string("fp16")];
|
| 38 |
+
tensor<fp16, [1, 1, 256, 256]> var_90_to_fp16 = cast(dtype = var_95_to_fp16_dtype_0, x = var_90)[name = string("cast_55")];
|
| 39 |
+
tensor<fp16, [1, 1, 256, 256]> inverted_mask_cast_fp16 = sub(x = var_96_to_fp16, y = var_90_to_fp16)[name = string("inverted_mask_cast_fp16")];
|
| 40 |
+
string var_103_dtype_0 = const()[name = string("op_103_dtype_0"), val = string("bool")];
|
| 41 |
+
fp16 var_104_to_fp16 = const()[name = string("op_104_to_fp16"), val = fp16(-inf)];
|
| 42 |
+
tensor<bool, [1, 1, 256, 256]> inverted_mask_cast_fp16_to_bool = cast(dtype = var_103_dtype_0, x = inverted_mask_cast_fp16)[name = string("cast_54")];
|
| 43 |
+
tensor<fp16, [1, 1, 256, 256]> attention_mask_cast_fp16 = select(a = var_104_to_fp16, b = inverted_mask_cast_fp16, cond = inverted_mask_cast_fp16_to_bool)[name = string("attention_mask_cast_fp16")];
|
| 44 |
+
tensor<fp16, [768, 128]> bert_encoder_embedding_hidden_mapping_in_weight_to_fp16 = const()[name = string("bert_encoder_embedding_hidden_mapping_in_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177536)))];
|
| 45 |
+
tensor<fp16, [768]> bert_encoder_embedding_hidden_mapping_in_bias_to_fp16 = const()[name = string("bert_encoder_embedding_hidden_mapping_in_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374208)))];
|
| 46 |
+
tensor<fp16, [1, 256, 768]> linear_0_cast_fp16 = linear(bias = bert_encoder_embedding_hidden_mapping_in_bias_to_fp16, weight = bert_encoder_embedding_hidden_mapping_in_weight_to_fp16, x = input_7_cast_fp16)[name = string("linear_0_cast_fp16")];
|
| 47 |
+
tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375808)))];
|
| 48 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1555520)))];
|
| 49 |
+
tensor<fp16, [1, 256, 768]> linear_1_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_1_cast_fp16")];
|
| 50 |
+
tensor<int32, [4]> var_143 = const()[name = string("op_143"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 51 |
+
tensor<fp16, [1, 256, 12, 64]> x_3_cast_fp16 = reshape(shape = var_143, x = linear_1_cast_fp16)[name = string("x_3_cast_fp16")];
|
| 52 |
+
tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1557120)))];
|
| 53 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2736832)))];
|
| 54 |
+
tensor<fp16, [1, 256, 768]> linear_2_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_2_cast_fp16")];
|
| 55 |
+
tensor<int32, [4]> var_152 = const()[name = string("op_152"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 56 |
+
tensor<fp16, [1, 256, 12, 64]> x_7_cast_fp16 = reshape(shape = var_152, x = linear_2_cast_fp16)[name = string("x_7_cast_fp16")];
|
| 57 |
+
tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2738432)))];
|
| 58 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3918144)))];
|
| 59 |
+
tensor<fp16, [1, 256, 768]> linear_3_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_3_cast_fp16")];
|
| 60 |
+
tensor<int32, [4]> var_161 = const()[name = string("op_161"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 61 |
+
tensor<fp16, [1, 256, 12, 64]> x_11_cast_fp16 = reshape(shape = var_161, x = linear_3_cast_fp16)[name = string("x_11_cast_fp16")];
|
| 62 |
+
tensor<int32, [4]> transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 63 |
+
tensor<int32, [4]> transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 64 |
+
tensor<int32, [4]> transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 65 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_74 = transpose(perm = transpose_74_perm_0, x = x_11_cast_fp16)[name = string("transpose_154")];
|
| 66 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_73 = transpose(perm = transpose_73_perm_0, x = x_7_cast_fp16)[name = string("transpose_155")];
|
| 67 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_72 = transpose(perm = transpose_72_perm_0, x = x_3_cast_fp16)[name = string("transpose_156")];
|
| 68 |
+
tensor<fp16, [1, 12, 256, 64]> attention_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_73, query = transpose_72, value = transpose_74)[name = string("attention_output_1_cast_fp16")];
|
| 69 |
+
tensor<int32, [4]> attention_output_3_perm_0 = const()[name = string("attention_output_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 70 |
+
tensor<int32, [3]> var_167 = const()[name = string("op_167"), val = tensor<int32, [3]>([1, 256, 768])];
|
| 71 |
+
tensor<fp16, [1, 256, 12, 64]> attention_output_3_cast_fp16 = transpose(perm = attention_output_3_perm_0, x = attention_output_1_cast_fp16)[name = string("transpose_153")];
|
| 72 |
+
tensor<fp16, [1, 256, 768]> input_9_cast_fp16 = reshape(shape = var_167, x = attention_output_3_cast_fp16)[name = string("input_9_cast_fp16")];
|
| 73 |
+
tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3919744)))];
|
| 74 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5099456)))];
|
| 75 |
+
tensor<fp16, [1, 256, 768]> linear_4_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_9_cast_fp16)[name = string("linear_4_cast_fp16")];
|
| 76 |
+
tensor<fp16, [1, 256, 768]> input_11_cast_fp16 = add(x = linear_0_cast_fp16, y = linear_4_cast_fp16)[name = string("input_11_cast_fp16")];
|
| 77 |
+
tensor<int32, [1]> input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 78 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5101056)))];
|
| 79 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5102656)))];
|
| 80 |
+
fp16 var_118_to_fp16 = const()[name = string("op_118_to_fp16"), val = fp16(0x1p-24)];
|
| 81 |
+
tensor<fp16, [1, 256, 768]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_11_cast_fp16)[name = string("input_13_cast_fp16")];
|
| 82 |
+
tensor<fp16, [2048, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16"), val = tensor<fp16, [2048, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5104256)))];
|
| 83 |
+
tensor<fp16, [2048]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8250048)))];
|
| 84 |
+
tensor<fp16, [1, 256, 2048]> linear_5_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_13_cast_fp16)[name = string("linear_5_cast_fp16")];
|
| 85 |
+
string input_17_mode_0 = const()[name = string("input_17_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 86 |
+
tensor<fp16, [1, 256, 2048]> input_17_cast_fp16 = gelu(mode = input_17_mode_0, x = linear_5_cast_fp16)[name = string("input_17_cast_fp16")];
|
| 87 |
+
tensor<fp16, [768, 2048]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16"), val = tensor<fp16, [768, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8254208)))];
|
| 88 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11400000)))];
|
| 89 |
+
tensor<fp16, [1, 256, 768]> linear_6_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_17_cast_fp16)[name = string("linear_6_cast_fp16")];
|
| 90 |
+
tensor<fp16, [1, 256, 768]> input_19_cast_fp16 = add(x = linear_6_cast_fp16, y = input_13_cast_fp16)[name = string("input_19_cast_fp16")];
|
| 91 |
+
tensor<int32, [1]> hidden_states_3_axes_0 = const()[name = string("hidden_states_3_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 92 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11401600)))];
|
| 93 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11403200)))];
|
| 94 |
+
tensor<fp16, [1, 256, 768]> hidden_states_3_cast_fp16 = layer_norm(axes = hidden_states_3_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_19_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
|
| 95 |
+
tensor<fp16, [1, 256, 768]> linear_7_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_7_cast_fp16")];
|
| 96 |
+
tensor<int32, [4]> var_218 = const()[name = string("op_218"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 97 |
+
tensor<fp16, [1, 256, 12, 64]> x_15_cast_fp16 = reshape(shape = var_218, x = linear_7_cast_fp16)[name = string("x_15_cast_fp16")];
|
| 98 |
+
tensor<fp16, [1, 256, 768]> linear_8_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_8_cast_fp16")];
|
| 99 |
+
tensor<int32, [4]> var_227 = const()[name = string("op_227"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 100 |
+
tensor<fp16, [1, 256, 12, 64]> x_19_cast_fp16 = reshape(shape = var_227, x = linear_8_cast_fp16)[name = string("x_19_cast_fp16")];
|
| 101 |
+
tensor<fp16, [1, 256, 768]> linear_9_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_9_cast_fp16")];
|
| 102 |
+
tensor<int32, [4]> var_236 = const()[name = string("op_236"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 103 |
+
tensor<fp16, [1, 256, 12, 64]> x_23_cast_fp16 = reshape(shape = var_236, x = linear_9_cast_fp16)[name = string("x_23_cast_fp16")];
|
| 104 |
+
tensor<int32, [4]> transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 105 |
+
tensor<int32, [4]> transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 106 |
+
tensor<int32, [4]> transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 107 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_77 = transpose(perm = transpose_77_perm_0, x = x_23_cast_fp16)[name = string("transpose_150")];
|
| 108 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_76 = transpose(perm = transpose_76_perm_0, x = x_19_cast_fp16)[name = string("transpose_151")];
|
| 109 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_75 = transpose(perm = transpose_75_perm_0, x = x_15_cast_fp16)[name = string("transpose_152")];
|
| 110 |
+
tensor<fp16, [1, 12, 256, 64]> attention_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_76, query = transpose_75, value = transpose_77)[name = string("attention_output_5_cast_fp16")];
|
| 111 |
+
tensor<int32, [4]> attention_output_7_perm_0 = const()[name = string("attention_output_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 112 |
+
tensor<int32, [3]> var_242 = const()[name = string("op_242"), val = tensor<int32, [3]>([1, 256, 768])];
|
| 113 |
+
tensor<fp16, [1, 256, 12, 64]> attention_output_7_cast_fp16 = transpose(perm = attention_output_7_perm_0, x = attention_output_5_cast_fp16)[name = string("transpose_149")];
|
| 114 |
+
tensor<fp16, [1, 256, 768]> input_21_cast_fp16 = reshape(shape = var_242, x = attention_output_7_cast_fp16)[name = string("input_21_cast_fp16")];
|
| 115 |
+
tensor<fp16, [1, 256, 768]> linear_10_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_21_cast_fp16)[name = string("linear_10_cast_fp16")];
|
| 116 |
+
tensor<fp16, [1, 256, 768]> input_23_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = linear_10_cast_fp16)[name = string("input_23_cast_fp16")];
|
| 117 |
+
tensor<int32, [1]> input_25_axes_0 = const()[name = string("input_25_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 118 |
+
tensor<fp16, [1, 256, 768]> input_25_cast_fp16 = layer_norm(axes = input_25_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_23_cast_fp16)[name = string("input_25_cast_fp16")];
|
| 119 |
+
tensor<fp16, [1, 256, 2048]> linear_11_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_25_cast_fp16)[name = string("linear_11_cast_fp16")];
|
| 120 |
+
string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 121 |
+
tensor<fp16, [1, 256, 2048]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = linear_11_cast_fp16)[name = string("input_29_cast_fp16")];
|
| 122 |
+
tensor<fp16, [1, 256, 768]> linear_12_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_29_cast_fp16)[name = string("linear_12_cast_fp16")];
|
| 123 |
+
tensor<fp16, [1, 256, 768]> input_31_cast_fp16 = add(x = linear_12_cast_fp16, y = input_25_cast_fp16)[name = string("input_31_cast_fp16")];
|
| 124 |
+
tensor<int32, [1]> hidden_states_5_axes_0 = const()[name = string("hidden_states_5_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 125 |
+
tensor<fp16, [1, 256, 768]> hidden_states_5_cast_fp16 = layer_norm(axes = hidden_states_5_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_31_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
|
| 126 |
+
tensor<fp16, [1, 256, 768]> linear_13_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_13_cast_fp16")];
|
| 127 |
+
tensor<int32, [4]> var_293 = const()[name = string("op_293"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 128 |
+
tensor<fp16, [1, 256, 12, 64]> x_27_cast_fp16 = reshape(shape = var_293, x = linear_13_cast_fp16)[name = string("x_27_cast_fp16")];
|
| 129 |
+
tensor<fp16, [1, 256, 768]> linear_14_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_14_cast_fp16")];
|
| 130 |
+
tensor<int32, [4]> var_302 = const()[name = string("op_302"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 131 |
+
tensor<fp16, [1, 256, 12, 64]> x_31_cast_fp16 = reshape(shape = var_302, x = linear_14_cast_fp16)[name = string("x_31_cast_fp16")];
|
| 132 |
+
tensor<fp16, [1, 256, 768]> linear_15_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_15_cast_fp16")];
|
| 133 |
+
tensor<int32, [4]> var_311 = const()[name = string("op_311"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 134 |
+
tensor<fp16, [1, 256, 12, 64]> x_35_cast_fp16 = reshape(shape = var_311, x = linear_15_cast_fp16)[name = string("x_35_cast_fp16")];
|
| 135 |
+
tensor<int32, [4]> transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 136 |
+
tensor<int32, [4]> transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 137 |
+
tensor<int32, [4]> transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 138 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_80 = transpose(perm = transpose_80_perm_0, x = x_35_cast_fp16)[name = string("transpose_146")];
|
| 139 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_79 = transpose(perm = transpose_79_perm_0, x = x_31_cast_fp16)[name = string("transpose_147")];
|
| 140 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_78 = transpose(perm = transpose_78_perm_0, x = x_27_cast_fp16)[name = string("transpose_148")];
|
| 141 |
+
tensor<fp16, [1, 12, 256, 64]> attention_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_79, query = transpose_78, value = transpose_80)[name = string("attention_output_9_cast_fp16")];
|
| 142 |
+
tensor<int32, [4]> attention_output_11_perm_0 = const()[name = string("attention_output_11_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 143 |
+
tensor<int32, [3]> var_317 = const()[name = string("op_317"), val = tensor<int32, [3]>([1, 256, 768])];
|
| 144 |
+
tensor<fp16, [1, 256, 12, 64]> attention_output_11_cast_fp16 = transpose(perm = attention_output_11_perm_0, x = attention_output_9_cast_fp16)[name = string("transpose_145")];
|
| 145 |
+
tensor<fp16, [1, 256, 768]> input_33_cast_fp16 = reshape(shape = var_317, x = attention_output_11_cast_fp16)[name = string("input_33_cast_fp16")];
|
| 146 |
+
tensor<fp16, [1, 256, 768]> linear_16_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_33_cast_fp16)[name = string("linear_16_cast_fp16")];
|
| 147 |
+
tensor<fp16, [1, 256, 768]> input_35_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = linear_16_cast_fp16)[name = string("input_35_cast_fp16")];
|
| 148 |
+
tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 149 |
+
tensor<fp16, [1, 256, 768]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("input_37_cast_fp16")];
|
| 150 |
+
tensor<fp16, [1, 256, 2048]> linear_17_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_37_cast_fp16)[name = string("linear_17_cast_fp16")];
|
| 151 |
+
string input_41_mode_0 = const()[name = string("input_41_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 152 |
+
tensor<fp16, [1, 256, 2048]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = linear_17_cast_fp16)[name = string("input_41_cast_fp16")];
|
| 153 |
+
tensor<fp16, [1, 256, 768]> linear_18_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_41_cast_fp16)[name = string("linear_18_cast_fp16")];
|
| 154 |
+
tensor<fp16, [1, 256, 768]> input_43_cast_fp16 = add(x = linear_18_cast_fp16, y = input_37_cast_fp16)[name = string("input_43_cast_fp16")];
|
| 155 |
+
tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 156 |
+
tensor<fp16, [1, 256, 768]> hidden_states_7_cast_fp16 = layer_norm(axes = hidden_states_7_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_43_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
|
| 157 |
+
tensor<fp16, [1, 256, 768]> linear_19_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_19_cast_fp16")];
|
| 158 |
+
tensor<int32, [4]> var_368 = const()[name = string("op_368"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 159 |
+
tensor<fp16, [1, 256, 12, 64]> x_39_cast_fp16 = reshape(shape = var_368, x = linear_19_cast_fp16)[name = string("x_39_cast_fp16")];
|
| 160 |
+
tensor<fp16, [1, 256, 768]> linear_20_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_20_cast_fp16")];
|
| 161 |
+
tensor<int32, [4]> var_377 = const()[name = string("op_377"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 162 |
+
tensor<fp16, [1, 256, 12, 64]> x_43_cast_fp16 = reshape(shape = var_377, x = linear_20_cast_fp16)[name = string("x_43_cast_fp16")];
|
| 163 |
+
tensor<fp16, [1, 256, 768]> linear_21_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_21_cast_fp16")];
|
| 164 |
+
tensor<int32, [4]> var_386 = const()[name = string("op_386"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 165 |
+
tensor<fp16, [1, 256, 12, 64]> x_47_cast_fp16 = reshape(shape = var_386, x = linear_21_cast_fp16)[name = string("x_47_cast_fp16")];
|
| 166 |
+
tensor<int32, [4]> transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 167 |
+
tensor<int32, [4]> transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 168 |
+
tensor<int32, [4]> transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 169 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_83 = transpose(perm = transpose_83_perm_0, x = x_47_cast_fp16)[name = string("transpose_142")];
|
| 170 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_82 = transpose(perm = transpose_82_perm_0, x = x_43_cast_fp16)[name = string("transpose_143")];
|
| 171 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_81 = transpose(perm = transpose_81_perm_0, x = x_39_cast_fp16)[name = string("transpose_144")];
|
| 172 |
+
tensor<fp16, [1, 12, 256, 64]> attention_output_13_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_82, query = transpose_81, value = transpose_83)[name = string("attention_output_13_cast_fp16")];
|
| 173 |
+
tensor<int32, [4]> attention_output_15_perm_0 = const()[name = string("attention_output_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 174 |
+
tensor<int32, [3]> var_392 = const()[name = string("op_392"), val = tensor<int32, [3]>([1, 256, 768])];
|
| 175 |
+
tensor<fp16, [1, 256, 12, 64]> attention_output_15_cast_fp16 = transpose(perm = attention_output_15_perm_0, x = attention_output_13_cast_fp16)[name = string("transpose_141")];
|
| 176 |
+
tensor<fp16, [1, 256, 768]> input_45_cast_fp16 = reshape(shape = var_392, x = attention_output_15_cast_fp16)[name = string("input_45_cast_fp16")];
|
| 177 |
+
tensor<fp16, [1, 256, 768]> linear_22_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_45_cast_fp16)[name = string("linear_22_cast_fp16")];
|
| 178 |
+
tensor<fp16, [1, 256, 768]> input_47_cast_fp16 = add(x = hidden_states_7_cast_fp16, y = linear_22_cast_fp16)[name = string("input_47_cast_fp16")];
|
| 179 |
+
tensor<int32, [1]> input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 180 |
+
tensor<fp16, [1, 256, 768]> input_49_cast_fp16 = layer_norm(axes = input_49_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_47_cast_fp16)[name = string("input_49_cast_fp16")];
|
| 181 |
+
tensor<fp16, [1, 256, 2048]> linear_23_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_49_cast_fp16)[name = string("linear_23_cast_fp16")];
|
| 182 |
+
string input_53_mode_0 = const()[name = string("input_53_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 183 |
+
tensor<fp16, [1, 256, 2048]> input_53_cast_fp16 = gelu(mode = input_53_mode_0, x = linear_23_cast_fp16)[name = string("input_53_cast_fp16")];
|
| 184 |
+
tensor<fp16, [1, 256, 768]> linear_24_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_53_cast_fp16)[name = string("linear_24_cast_fp16")];
|
| 185 |
+
tensor<fp16, [1, 256, 768]> input_55_cast_fp16 = add(x = linear_24_cast_fp16, y = input_49_cast_fp16)[name = string("input_55_cast_fp16")];
|
| 186 |
+
tensor<int32, [1]> hidden_states_9_axes_0 = const()[name = string("hidden_states_9_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 187 |
+
tensor<fp16, [1, 256, 768]> hidden_states_9_cast_fp16 = layer_norm(axes = hidden_states_9_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_55_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
|
| 188 |
+
tensor<fp16, [1, 256, 768]> linear_25_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_25_cast_fp16")];
|
| 189 |
+
tensor<int32, [4]> var_443 = const()[name = string("op_443"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 190 |
+
tensor<fp16, [1, 256, 12, 64]> x_51_cast_fp16 = reshape(shape = var_443, x = linear_25_cast_fp16)[name = string("x_51_cast_fp16")];
|
| 191 |
+
tensor<fp16, [1, 256, 768]> linear_26_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_26_cast_fp16")];
|
| 192 |
+
tensor<int32, [4]> var_452 = const()[name = string("op_452"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 193 |
+
tensor<fp16, [1, 256, 12, 64]> x_55_cast_fp16 = reshape(shape = var_452, x = linear_26_cast_fp16)[name = string("x_55_cast_fp16")];
|
| 194 |
+
tensor<fp16, [1, 256, 768]> linear_27_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_27_cast_fp16")];
|
| 195 |
+
tensor<int32, [4]> var_461 = const()[name = string("op_461"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 196 |
+
tensor<fp16, [1, 256, 12, 64]> x_59_cast_fp16 = reshape(shape = var_461, x = linear_27_cast_fp16)[name = string("x_59_cast_fp16")];
|
| 197 |
+
tensor<int32, [4]> transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 198 |
+
tensor<int32, [4]> transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 199 |
+
tensor<int32, [4]> transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 200 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_86 = transpose(perm = transpose_86_perm_0, x = x_59_cast_fp16)[name = string("transpose_138")];
|
| 201 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_85 = transpose(perm = transpose_85_perm_0, x = x_55_cast_fp16)[name = string("transpose_139")];
|
| 202 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_84 = transpose(perm = transpose_84_perm_0, x = x_51_cast_fp16)[name = string("transpose_140")];
|
| 203 |
+
tensor<fp16, [1, 12, 256, 64]> attention_output_17_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_85, query = transpose_84, value = transpose_86)[name = string("attention_output_17_cast_fp16")];
|
| 204 |
+
tensor<int32, [4]> attention_output_19_perm_0 = const()[name = string("attention_output_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 205 |
+
tensor<int32, [3]> var_467 = const()[name = string("op_467"), val = tensor<int32, [3]>([1, 256, 768])];
|
| 206 |
+
tensor<fp16, [1, 256, 12, 64]> attention_output_19_cast_fp16 = transpose(perm = attention_output_19_perm_0, x = attention_output_17_cast_fp16)[name = string("transpose_137")];
|
| 207 |
+
tensor<fp16, [1, 256, 768]> input_57_cast_fp16 = reshape(shape = var_467, x = attention_output_19_cast_fp16)[name = string("input_57_cast_fp16")];
|
| 208 |
+
tensor<fp16, [1, 256, 768]> linear_28_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_57_cast_fp16)[name = string("linear_28_cast_fp16")];
|
| 209 |
+
tensor<fp16, [1, 256, 768]> input_59_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_28_cast_fp16)[name = string("input_59_cast_fp16")];
|
| 210 |
+
tensor<int32, [1]> input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 211 |
+
tensor<fp16, [1, 256, 768]> input_61_cast_fp16 = layer_norm(axes = input_61_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_59_cast_fp16)[name = string("input_61_cast_fp16")];
|
| 212 |
+
tensor<fp16, [1, 256, 2048]> linear_29_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_61_cast_fp16)[name = string("linear_29_cast_fp16")];
|
| 213 |
+
string input_65_mode_0 = const()[name = string("input_65_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 214 |
+
tensor<fp16, [1, 256, 2048]> input_65_cast_fp16 = gelu(mode = input_65_mode_0, x = linear_29_cast_fp16)[name = string("input_65_cast_fp16")];
|
| 215 |
+
tensor<fp16, [1, 256, 768]> linear_30_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_65_cast_fp16)[name = string("linear_30_cast_fp16")];
|
| 216 |
+
tensor<fp16, [1, 256, 768]> input_67_cast_fp16 = add(x = linear_30_cast_fp16, y = input_61_cast_fp16)[name = string("input_67_cast_fp16")];
|
| 217 |
+
tensor<int32, [1]> hidden_states_11_axes_0 = const()[name = string("hidden_states_11_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 218 |
+
tensor<fp16, [1, 256, 768]> hidden_states_11_cast_fp16 = layer_norm(axes = hidden_states_11_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_67_cast_fp16)[name = string("hidden_states_11_cast_fp16")];
|
| 219 |
+
tensor<fp16, [1, 256, 768]> linear_31_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_31_cast_fp16")];
|
| 220 |
+
tensor<int32, [4]> var_518 = const()[name = string("op_518"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 221 |
+
tensor<fp16, [1, 256, 12, 64]> x_63_cast_fp16 = reshape(shape = var_518, x = linear_31_cast_fp16)[name = string("x_63_cast_fp16")];
|
| 222 |
+
tensor<fp16, [1, 256, 768]> linear_32_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_32_cast_fp16")];
|
| 223 |
+
tensor<int32, [4]> var_527 = const()[name = string("op_527"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 224 |
+
tensor<fp16, [1, 256, 12, 64]> x_67_cast_fp16 = reshape(shape = var_527, x = linear_32_cast_fp16)[name = string("x_67_cast_fp16")];
|
| 225 |
+
tensor<fp16, [1, 256, 768]> linear_33_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_33_cast_fp16")];
|
| 226 |
+
tensor<int32, [4]> var_536 = const()[name = string("op_536"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 227 |
+
tensor<fp16, [1, 256, 12, 64]> x_71_cast_fp16 = reshape(shape = var_536, x = linear_33_cast_fp16)[name = string("x_71_cast_fp16")];
|
| 228 |
+
tensor<int32, [4]> transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 229 |
+
tensor<int32, [4]> transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 230 |
+
tensor<int32, [4]> transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 231 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_89 = transpose(perm = transpose_89_perm_0, x = x_71_cast_fp16)[name = string("transpose_134")];
|
| 232 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_88 = transpose(perm = transpose_88_perm_0, x = x_67_cast_fp16)[name = string("transpose_135")];
|
| 233 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_87 = transpose(perm = transpose_87_perm_0, x = x_63_cast_fp16)[name = string("transpose_136")];
|
| 234 |
+
tensor<fp16, [1, 12, 256, 64]> attention_output_21_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_88, query = transpose_87, value = transpose_89)[name = string("attention_output_21_cast_fp16")];
|
| 235 |
+
tensor<int32, [4]> attention_output_23_perm_0 = const()[name = string("attention_output_23_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 236 |
+
tensor<int32, [3]> var_542 = const()[name = string("op_542"), val = tensor<int32, [3]>([1, 256, 768])];
|
| 237 |
+
tensor<fp16, [1, 256, 12, 64]> attention_output_23_cast_fp16 = transpose(perm = attention_output_23_perm_0, x = attention_output_21_cast_fp16)[name = string("transpose_133")];
|
| 238 |
+
tensor<fp16, [1, 256, 768]> input_69_cast_fp16 = reshape(shape = var_542, x = attention_output_23_cast_fp16)[name = string("input_69_cast_fp16")];
|
| 239 |
+
tensor<fp16, [1, 256, 768]> linear_34_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_69_cast_fp16)[name = string("linear_34_cast_fp16")];
|
| 240 |
+
tensor<fp16, [1, 256, 768]> input_71_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = linear_34_cast_fp16)[name = string("input_71_cast_fp16")];
|
| 241 |
+
tensor<int32, [1]> input_73_axes_0 = const()[name = string("input_73_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 242 |
+
tensor<fp16, [1, 256, 768]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("input_73_cast_fp16")];
|
| 243 |
+
tensor<fp16, [1, 256, 2048]> linear_35_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_73_cast_fp16)[name = string("linear_35_cast_fp16")];
|
| 244 |
+
string input_77_mode_0 = const()[name = string("input_77_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 245 |
+
tensor<fp16, [1, 256, 2048]> input_77_cast_fp16 = gelu(mode = input_77_mode_0, x = linear_35_cast_fp16)[name = string("input_77_cast_fp16")];
|
| 246 |
+
tensor<fp16, [1, 256, 768]> linear_36_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_77_cast_fp16)[name = string("linear_36_cast_fp16")];
|
| 247 |
+
tensor<fp16, [1, 256, 768]> input_79_cast_fp16 = add(x = linear_36_cast_fp16, y = input_73_cast_fp16)[name = string("input_79_cast_fp16")];
|
| 248 |
+
tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 249 |
+
tensor<fp16, [1, 256, 768]> hidden_states_13_cast_fp16 = layer_norm(axes = hidden_states_13_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_79_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
|
| 250 |
+
tensor<fp16, [1, 256, 768]> linear_37_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_37_cast_fp16")];
|
| 251 |
+
tensor<int32, [4]> var_593 = const()[name = string("op_593"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 252 |
+
tensor<fp16, [1, 256, 12, 64]> x_75_cast_fp16 = reshape(shape = var_593, x = linear_37_cast_fp16)[name = string("x_75_cast_fp16")];
|
| 253 |
+
tensor<fp16, [1, 256, 768]> linear_38_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_38_cast_fp16")];
|
| 254 |
+
tensor<int32, [4]> var_602 = const()[name = string("op_602"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 255 |
+
tensor<fp16, [1, 256, 12, 64]> x_79_cast_fp16 = reshape(shape = var_602, x = linear_38_cast_fp16)[name = string("x_79_cast_fp16")];
|
| 256 |
+
tensor<fp16, [1, 256, 768]> linear_39_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_39_cast_fp16")];
|
| 257 |
+
tensor<int32, [4]> var_611 = const()[name = string("op_611"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 258 |
+
tensor<fp16, [1, 256, 12, 64]> x_83_cast_fp16 = reshape(shape = var_611, x = linear_39_cast_fp16)[name = string("x_83_cast_fp16")];
|
| 259 |
+
tensor<int32, [4]> transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 260 |
+
tensor<int32, [4]> transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 261 |
+
tensor<int32, [4]> transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 262 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_92 = transpose(perm = transpose_92_perm_0, x = x_83_cast_fp16)[name = string("transpose_130")];
|
| 263 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_91 = transpose(perm = transpose_91_perm_0, x = x_79_cast_fp16)[name = string("transpose_131")];
|
| 264 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_90 = transpose(perm = transpose_90_perm_0, x = x_75_cast_fp16)[name = string("transpose_132")];
|
| 265 |
+
tensor<fp16, [1, 12, 256, 64]> attention_output_25_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_91, query = transpose_90, value = transpose_92)[name = string("attention_output_25_cast_fp16")];
|
| 266 |
+
tensor<int32, [4]> attention_output_27_perm_0 = const()[name = string("attention_output_27_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 267 |
+
tensor<int32, [3]> var_617 = const()[name = string("op_617"), val = tensor<int32, [3]>([1, 256, 768])];
|
| 268 |
+
tensor<fp16, [1, 256, 12, 64]> attention_output_27_cast_fp16 = transpose(perm = attention_output_27_perm_0, x = attention_output_25_cast_fp16)[name = string("transpose_129")];
|
| 269 |
+
tensor<fp16, [1, 256, 768]> input_81_cast_fp16 = reshape(shape = var_617, x = attention_output_27_cast_fp16)[name = string("input_81_cast_fp16")];
|
| 270 |
+
tensor<fp16, [1, 256, 768]> linear_40_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_81_cast_fp16)[name = string("linear_40_cast_fp16")];
|
| 271 |
+
tensor<fp16, [1, 256, 768]> input_83_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = linear_40_cast_fp16)[name = string("input_83_cast_fp16")];
|
| 272 |
+
tensor<int32, [1]> input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 273 |
+
tensor<fp16, [1, 256, 768]> input_85_cast_fp16 = layer_norm(axes = input_85_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_83_cast_fp16)[name = string("input_85_cast_fp16")];
|
| 274 |
+
tensor<fp16, [1, 256, 2048]> linear_41_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_85_cast_fp16)[name = string("linear_41_cast_fp16")];
|
| 275 |
+
string input_89_mode_0 = const()[name = string("input_89_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 276 |
+
tensor<fp16, [1, 256, 2048]> input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = linear_41_cast_fp16)[name = string("input_89_cast_fp16")];
|
| 277 |
+
tensor<fp16, [1, 256, 768]> linear_42_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_89_cast_fp16)[name = string("linear_42_cast_fp16")];
|
| 278 |
+
tensor<fp16, [1, 256, 768]> input_91_cast_fp16 = add(x = linear_42_cast_fp16, y = input_85_cast_fp16)[name = string("input_91_cast_fp16")];
|
| 279 |
+
tensor<int32, [1]> hidden_states_15_axes_0 = const()[name = string("hidden_states_15_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 280 |
+
tensor<fp16, [1, 256, 768]> hidden_states_15_cast_fp16 = layer_norm(axes = hidden_states_15_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_91_cast_fp16)[name = string("hidden_states_15_cast_fp16")];
|
| 281 |
+
tensor<fp16, [1, 256, 768]> linear_43_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_43_cast_fp16")];
|
| 282 |
+
tensor<int32, [4]> var_668 = const()[name = string("op_668"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 283 |
+
tensor<fp16, [1, 256, 12, 64]> x_87_cast_fp16 = reshape(shape = var_668, x = linear_43_cast_fp16)[name = string("x_87_cast_fp16")];
|
| 284 |
+
tensor<fp16, [1, 256, 768]> linear_44_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_44_cast_fp16")];
|
| 285 |
+
tensor<int32, [4]> var_677 = const()[name = string("op_677"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 286 |
+
tensor<fp16, [1, 256, 12, 64]> x_91_cast_fp16 = reshape(shape = var_677, x = linear_44_cast_fp16)[name = string("x_91_cast_fp16")];
|
| 287 |
+
tensor<fp16, [1, 256, 768]> linear_45_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_45_cast_fp16")];
|
| 288 |
+
tensor<int32, [4]> var_686 = const()[name = string("op_686"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 289 |
+
tensor<fp16, [1, 256, 12, 64]> x_95_cast_fp16 = reshape(shape = var_686, x = linear_45_cast_fp16)[name = string("x_95_cast_fp16")];
|
| 290 |
+
tensor<int32, [4]> transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 291 |
+
tensor<int32, [4]> transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 292 |
+
tensor<int32, [4]> transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 293 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_95 = transpose(perm = transpose_95_perm_0, x = x_95_cast_fp16)[name = string("transpose_126")];
|
| 294 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_94 = transpose(perm = transpose_94_perm_0, x = x_91_cast_fp16)[name = string("transpose_127")];
|
| 295 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_93 = transpose(perm = transpose_93_perm_0, x = x_87_cast_fp16)[name = string("transpose_128")];
|
| 296 |
+
tensor<fp16, [1, 12, 256, 64]> attention_output_29_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_94, query = transpose_93, value = transpose_95)[name = string("attention_output_29_cast_fp16")];
|
| 297 |
+
tensor<int32, [4]> attention_output_31_perm_0 = const()[name = string("attention_output_31_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 298 |
+
tensor<int32, [3]> var_692 = const()[name = string("op_692"), val = tensor<int32, [3]>([1, 256, 768])];
|
| 299 |
+
tensor<fp16, [1, 256, 12, 64]> attention_output_31_cast_fp16 = transpose(perm = attention_output_31_perm_0, x = attention_output_29_cast_fp16)[name = string("transpose_125")];
|
| 300 |
+
tensor<fp16, [1, 256, 768]> input_93_cast_fp16 = reshape(shape = var_692, x = attention_output_31_cast_fp16)[name = string("input_93_cast_fp16")];
|
| 301 |
+
tensor<fp16, [1, 256, 768]> linear_46_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_93_cast_fp16)[name = string("linear_46_cast_fp16")];
|
| 302 |
+
tensor<fp16, [1, 256, 768]> input_95_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = linear_46_cast_fp16)[name = string("input_95_cast_fp16")];
|
| 303 |
+
tensor<int32, [1]> input_97_axes_0 = const()[name = string("input_97_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 304 |
+
tensor<fp16, [1, 256, 768]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_95_cast_fp16)[name = string("input_97_cast_fp16")];
|
| 305 |
+
tensor<fp16, [1, 256, 2048]> linear_47_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_97_cast_fp16)[name = string("linear_47_cast_fp16")];
|
| 306 |
+
string input_101_mode_0 = const()[name = string("input_101_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 307 |
+
tensor<fp16, [1, 256, 2048]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = linear_47_cast_fp16)[name = string("input_101_cast_fp16")];
|
| 308 |
+
tensor<fp16, [1, 256, 768]> linear_48_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_101_cast_fp16)[name = string("linear_48_cast_fp16")];
|
| 309 |
+
tensor<fp16, [1, 256, 768]> input_103_cast_fp16 = add(x = linear_48_cast_fp16, y = input_97_cast_fp16)[name = string("input_103_cast_fp16")];
|
| 310 |
+
tensor<int32, [1]> hidden_states_17_axes_0 = const()[name = string("hidden_states_17_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 311 |
+
tensor<fp16, [1, 256, 768]> hidden_states_17_cast_fp16 = layer_norm(axes = hidden_states_17_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_103_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
|
| 312 |
+
tensor<fp16, [1, 256, 768]> linear_49_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_49_cast_fp16")];
|
| 313 |
+
tensor<int32, [4]> var_743 = const()[name = string("op_743"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 314 |
+
tensor<fp16, [1, 256, 12, 64]> x_99_cast_fp16 = reshape(shape = var_743, x = linear_49_cast_fp16)[name = string("x_99_cast_fp16")];
|
| 315 |
+
tensor<fp16, [1, 256, 768]> linear_50_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_50_cast_fp16")];
|
| 316 |
+
tensor<int32, [4]> var_752 = const()[name = string("op_752"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 317 |
+
tensor<fp16, [1, 256, 12, 64]> x_103_cast_fp16 = reshape(shape = var_752, x = linear_50_cast_fp16)[name = string("x_103_cast_fp16")];
|
| 318 |
+
tensor<fp16, [1, 256, 768]> linear_51_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_51_cast_fp16")];
|
| 319 |
+
tensor<int32, [4]> var_761 = const()[name = string("op_761"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 320 |
+
tensor<fp16, [1, 256, 12, 64]> x_107_cast_fp16 = reshape(shape = var_761, x = linear_51_cast_fp16)[name = string("x_107_cast_fp16")];
|
| 321 |
+
tensor<int32, [4]> transpose_96_perm_0 = const()[name = string("transpose_96_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 322 |
+
tensor<int32, [4]> transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 323 |
+
tensor<int32, [4]> transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 324 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_98 = transpose(perm = transpose_98_perm_0, x = x_107_cast_fp16)[name = string("transpose_122")];
|
| 325 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_97 = transpose(perm = transpose_97_perm_0, x = x_103_cast_fp16)[name = string("transpose_123")];
|
| 326 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_96 = transpose(perm = transpose_96_perm_0, x = x_99_cast_fp16)[name = string("transpose_124")];
|
| 327 |
+
tensor<fp16, [1, 12, 256, 64]> attention_output_33_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_97, query = transpose_96, value = transpose_98)[name = string("attention_output_33_cast_fp16")];
|
| 328 |
+
tensor<int32, [4]> attention_output_35_perm_0 = const()[name = string("attention_output_35_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 329 |
+
tensor<int32, [3]> var_767 = const()[name = string("op_767"), val = tensor<int32, [3]>([1, 256, 768])];
|
| 330 |
+
tensor<fp16, [1, 256, 12, 64]> attention_output_35_cast_fp16 = transpose(perm = attention_output_35_perm_0, x = attention_output_33_cast_fp16)[name = string("transpose_121")];
|
| 331 |
+
tensor<fp16, [1, 256, 768]> input_105_cast_fp16 = reshape(shape = var_767, x = attention_output_35_cast_fp16)[name = string("input_105_cast_fp16")];
|
| 332 |
+
tensor<fp16, [1, 256, 768]> linear_52_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_105_cast_fp16)[name = string("linear_52_cast_fp16")];
|
| 333 |
+
tensor<fp16, [1, 256, 768]> input_107_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_52_cast_fp16)[name = string("input_107_cast_fp16")];
|
| 334 |
+
tensor<int32, [1]> input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 335 |
+
tensor<fp16, [1, 256, 768]> input_109_cast_fp16 = layer_norm(axes = input_109_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")];
|
| 336 |
+
tensor<fp16, [1, 256, 2048]> linear_53_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_109_cast_fp16)[name = string("linear_53_cast_fp16")];
|
| 337 |
+
string input_113_mode_0 = const()[name = string("input_113_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 338 |
+
tensor<fp16, [1, 256, 2048]> input_113_cast_fp16 = gelu(mode = input_113_mode_0, x = linear_53_cast_fp16)[name = string("input_113_cast_fp16")];
|
| 339 |
+
tensor<fp16, [1, 256, 768]> linear_54_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_113_cast_fp16)[name = string("linear_54_cast_fp16")];
|
| 340 |
+
tensor<fp16, [1, 256, 768]> input_115_cast_fp16 = add(x = linear_54_cast_fp16, y = input_109_cast_fp16)[name = string("input_115_cast_fp16")];
|
| 341 |
+
tensor<int32, [1]> hidden_states_19_axes_0 = const()[name = string("hidden_states_19_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 342 |
+
tensor<fp16, [1, 256, 768]> hidden_states_19_cast_fp16 = layer_norm(axes = hidden_states_19_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_115_cast_fp16)[name = string("hidden_states_19_cast_fp16")];
|
| 343 |
+
tensor<fp16, [1, 256, 768]> linear_55_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_55_cast_fp16")];
|
| 344 |
+
tensor<int32, [4]> var_818 = const()[name = string("op_818"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 345 |
+
tensor<fp16, [1, 256, 12, 64]> x_111_cast_fp16 = reshape(shape = var_818, x = linear_55_cast_fp16)[name = string("x_111_cast_fp16")];
|
| 346 |
+
tensor<fp16, [1, 256, 768]> linear_56_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_56_cast_fp16")];
|
| 347 |
+
tensor<int32, [4]> var_827 = const()[name = string("op_827"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 348 |
+
tensor<fp16, [1, 256, 12, 64]> x_115_cast_fp16 = reshape(shape = var_827, x = linear_56_cast_fp16)[name = string("x_115_cast_fp16")];
|
| 349 |
+
tensor<fp16, [1, 256, 768]> linear_57_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_57_cast_fp16")];
|
| 350 |
+
tensor<int32, [4]> var_836 = const()[name = string("op_836"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 351 |
+
tensor<fp16, [1, 256, 12, 64]> x_119_cast_fp16 = reshape(shape = var_836, x = linear_57_cast_fp16)[name = string("x_119_cast_fp16")];
|
| 352 |
+
tensor<int32, [4]> transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 353 |
+
tensor<int32, [4]> transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 354 |
+
tensor<int32, [4]> transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 355 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_101 = transpose(perm = transpose_101_perm_0, x = x_119_cast_fp16)[name = string("transpose_118")];
|
| 356 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_100 = transpose(perm = transpose_100_perm_0, x = x_115_cast_fp16)[name = string("transpose_119")];
|
| 357 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_99 = transpose(perm = transpose_99_perm_0, x = x_111_cast_fp16)[name = string("transpose_120")];
|
| 358 |
+
tensor<fp16, [1, 12, 256, 64]> attention_output_37_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_100, query = transpose_99, value = transpose_101)[name = string("attention_output_37_cast_fp16")];
|
| 359 |
+
tensor<int32, [4]> attention_output_39_perm_0 = const()[name = string("attention_output_39_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 360 |
+
tensor<int32, [3]> var_842 = const()[name = string("op_842"), val = tensor<int32, [3]>([1, 256, 768])];
|
| 361 |
+
tensor<fp16, [1, 256, 12, 64]> attention_output_39_cast_fp16 = transpose(perm = attention_output_39_perm_0, x = attention_output_37_cast_fp16)[name = string("transpose_117")];
|
| 362 |
+
tensor<fp16, [1, 256, 768]> input_117_cast_fp16 = reshape(shape = var_842, x = attention_output_39_cast_fp16)[name = string("input_117_cast_fp16")];
|
| 363 |
+
tensor<fp16, [1, 256, 768]> linear_58_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_117_cast_fp16)[name = string("linear_58_cast_fp16")];
|
| 364 |
+
tensor<fp16, [1, 256, 768]> input_119_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = linear_58_cast_fp16)[name = string("input_119_cast_fp16")];
|
| 365 |
+
tensor<int32, [1]> input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 366 |
+
tensor<fp16, [1, 256, 768]> input_121_cast_fp16 = layer_norm(axes = input_121_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("input_121_cast_fp16")];
|
| 367 |
+
tensor<fp16, [1, 256, 2048]> linear_59_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_121_cast_fp16)[name = string("linear_59_cast_fp16")];
|
| 368 |
+
string input_125_mode_0 = const()[name = string("input_125_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 369 |
+
tensor<fp16, [1, 256, 2048]> input_125_cast_fp16 = gelu(mode = input_125_mode_0, x = linear_59_cast_fp16)[name = string("input_125_cast_fp16")];
|
| 370 |
+
tensor<fp16, [1, 256, 768]> linear_60_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_125_cast_fp16)[name = string("linear_60_cast_fp16")];
|
| 371 |
+
tensor<fp16, [1, 256, 768]> input_127_cast_fp16 = add(x = linear_60_cast_fp16, y = input_121_cast_fp16)[name = string("input_127_cast_fp16")];
|
| 372 |
+
tensor<int32, [1]> hidden_states_21_axes_0 = const()[name = string("hidden_states_21_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 373 |
+
tensor<fp16, [1, 256, 768]> hidden_states_21_cast_fp16 = layer_norm(axes = hidden_states_21_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_127_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
|
| 374 |
+
tensor<fp16, [1, 256, 768]> linear_61_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_61_cast_fp16")];
|
| 375 |
+
tensor<int32, [4]> var_893 = const()[name = string("op_893"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 376 |
+
tensor<fp16, [1, 256, 12, 64]> x_123_cast_fp16 = reshape(shape = var_893, x = linear_61_cast_fp16)[name = string("x_123_cast_fp16")];
|
| 377 |
+
tensor<fp16, [1, 256, 768]> linear_62_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_62_cast_fp16")];
|
| 378 |
+
tensor<int32, [4]> var_902 = const()[name = string("op_902"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 379 |
+
tensor<fp16, [1, 256, 12, 64]> x_127_cast_fp16 = reshape(shape = var_902, x = linear_62_cast_fp16)[name = string("x_127_cast_fp16")];
|
| 380 |
+
tensor<fp16, [1, 256, 768]> linear_63_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_63_cast_fp16")];
|
| 381 |
+
tensor<int32, [4]> var_911 = const()[name = string("op_911"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 382 |
+
tensor<fp16, [1, 256, 12, 64]> x_131_cast_fp16 = reshape(shape = var_911, x = linear_63_cast_fp16)[name = string("x_131_cast_fp16")];
|
| 383 |
+
tensor<int32, [4]> transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 384 |
+
tensor<int32, [4]> transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 385 |
+
tensor<int32, [4]> transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 386 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_104 = transpose(perm = transpose_104_perm_0, x = x_131_cast_fp16)[name = string("transpose_114")];
|
| 387 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_103 = transpose(perm = transpose_103_perm_0, x = x_127_cast_fp16)[name = string("transpose_115")];
|
| 388 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_102 = transpose(perm = transpose_102_perm_0, x = x_123_cast_fp16)[name = string("transpose_116")];
|
| 389 |
+
tensor<fp16, [1, 12, 256, 64]> attention_output_41_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_103, query = transpose_102, value = transpose_104)[name = string("attention_output_41_cast_fp16")];
|
| 390 |
+
tensor<int32, [4]> attention_output_43_perm_0 = const()[name = string("attention_output_43_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 391 |
+
tensor<int32, [3]> var_917 = const()[name = string("op_917"), val = tensor<int32, [3]>([1, 256, 768])];
|
| 392 |
+
tensor<fp16, [1, 256, 12, 64]> attention_output_43_cast_fp16 = transpose(perm = attention_output_43_perm_0, x = attention_output_41_cast_fp16)[name = string("transpose_113")];
|
| 393 |
+
tensor<fp16, [1, 256, 768]> input_129_cast_fp16 = reshape(shape = var_917, x = attention_output_43_cast_fp16)[name = string("input_129_cast_fp16")];
|
| 394 |
+
tensor<fp16, [1, 256, 768]> linear_64_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_129_cast_fp16)[name = string("linear_64_cast_fp16")];
|
| 395 |
+
tensor<fp16, [1, 256, 768]> input_131_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = linear_64_cast_fp16)[name = string("input_131_cast_fp16")];
|
| 396 |
+
tensor<int32, [1]> input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 397 |
+
tensor<fp16, [1, 256, 768]> input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_131_cast_fp16)[name = string("input_133_cast_fp16")];
|
| 398 |
+
tensor<fp16, [1, 256, 2048]> linear_65_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_133_cast_fp16)[name = string("linear_65_cast_fp16")];
|
| 399 |
+
string input_137_mode_0 = const()[name = string("input_137_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 400 |
+
tensor<fp16, [1, 256, 2048]> input_137_cast_fp16 = gelu(mode = input_137_mode_0, x = linear_65_cast_fp16)[name = string("input_137_cast_fp16")];
|
| 401 |
+
tensor<fp16, [1, 256, 768]> linear_66_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_137_cast_fp16)[name = string("linear_66_cast_fp16")];
|
| 402 |
+
tensor<fp16, [1, 256, 768]> input_139_cast_fp16 = add(x = linear_66_cast_fp16, y = input_133_cast_fp16)[name = string("input_139_cast_fp16")];
|
| 403 |
+
tensor<int32, [1]> hidden_states_axes_0 = const()[name = string("hidden_states_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 404 |
+
tensor<fp16, [1, 256, 768]> hidden_states_cast_fp16 = layer_norm(axes = hidden_states_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_139_cast_fp16)[name = string("hidden_states_cast_fp16")];
|
| 405 |
+
tensor<fp16, [1, 256, 768]> linear_67_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_67_cast_fp16")];
|
| 406 |
+
tensor<int32, [4]> var_968 = const()[name = string("op_968"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 407 |
+
tensor<fp16, [1, 256, 12, 64]> x_135_cast_fp16 = reshape(shape = var_968, x = linear_67_cast_fp16)[name = string("x_135_cast_fp16")];
|
| 408 |
+
tensor<fp16, [1, 256, 768]> linear_68_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_68_cast_fp16")];
|
| 409 |
+
tensor<int32, [4]> var_977 = const()[name = string("op_977"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 410 |
+
tensor<fp16, [1, 256, 12, 64]> x_139_cast_fp16 = reshape(shape = var_977, x = linear_68_cast_fp16)[name = string("x_139_cast_fp16")];
|
| 411 |
+
tensor<fp16, [1, 256, 768]> linear_69_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_69_cast_fp16")];
|
| 412 |
+
tensor<int32, [4]> var_986 = const()[name = string("op_986"), val = tensor<int32, [4]>([1, 256, 12, 64])];
|
| 413 |
+
tensor<fp16, [1, 256, 12, 64]> x_cast_fp16 = reshape(shape = var_986, x = linear_69_cast_fp16)[name = string("x_cast_fp16")];
|
| 414 |
+
tensor<int32, [4]> transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 415 |
+
tensor<int32, [4]> transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 416 |
+
tensor<int32, [4]> transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 417 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_107 = transpose(perm = transpose_107_perm_0, x = x_cast_fp16)[name = string("transpose_110")];
|
| 418 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_106 = transpose(perm = transpose_106_perm_0, x = x_139_cast_fp16)[name = string("transpose_111")];
|
| 419 |
+
tensor<fp16, [1, 12, 256, 64]> transpose_105 = transpose(perm = transpose_105_perm_0, x = x_135_cast_fp16)[name = string("transpose_112")];
|
| 420 |
+
tensor<fp16, [1, 12, 256, 64]> attention_output_45_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_106, query = transpose_105, value = transpose_107)[name = string("attention_output_45_cast_fp16")];
|
| 421 |
+
tensor<int32, [4]> attention_output_perm_0 = const()[name = string("attention_output_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 422 |
+
tensor<int32, [3]> var_992 = const()[name = string("op_992"), val = tensor<int32, [3]>([1, 256, 768])];
|
| 423 |
+
tensor<fp16, [1, 256, 12, 64]> attention_output_cast_fp16 = transpose(perm = attention_output_perm_0, x = attention_output_45_cast_fp16)[name = string("transpose_109")];
|
| 424 |
+
tensor<fp16, [1, 256, 768]> input_141_cast_fp16 = reshape(shape = var_992, x = attention_output_cast_fp16)[name = string("input_141_cast_fp16")];
|
| 425 |
+
tensor<fp16, [1, 256, 768]> linear_70_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_141_cast_fp16)[name = string("linear_70_cast_fp16")];
|
| 426 |
+
tensor<fp16, [1, 256, 768]> input_143_cast_fp16 = add(x = hidden_states_cast_fp16, y = linear_70_cast_fp16)[name = string("input_143_cast_fp16")];
|
| 427 |
+
tensor<int32, [1]> input_145_axes_0 = const()[name = string("input_145_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 428 |
+
tensor<fp16, [1, 256, 768]> input_145_cast_fp16 = layer_norm(axes = input_145_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_143_cast_fp16)[name = string("input_145_cast_fp16")];
|
| 429 |
+
tensor<fp16, [1, 256, 2048]> linear_71_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_145_cast_fp16)[name = string("linear_71_cast_fp16")];
|
| 430 |
+
string input_149_mode_0 = const()[name = string("input_149_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 431 |
+
tensor<fp16, [1, 256, 2048]> input_149_cast_fp16 = gelu(mode = input_149_mode_0, x = linear_71_cast_fp16)[name = string("input_149_cast_fp16")];
|
| 432 |
+
tensor<fp16, [1, 256, 768]> linear_72_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_149_cast_fp16)[name = string("linear_72_cast_fp16")];
|
| 433 |
+
tensor<fp16, [1, 256, 768]> input_151_cast_fp16 = add(x = linear_72_cast_fp16, y = input_145_cast_fp16)[name = string("input_151_cast_fp16")];
|
| 434 |
+
tensor<int32, [1]> sequence_output_axes_0 = const()[name = string("sequence_output_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 435 |
+
tensor<fp16, [1, 256, 768]> sequence_output = layer_norm(axes = sequence_output_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_151_cast_fp16)[name = string("sequence_output_cast_fp16")];
|
| 436 |
+
tensor<fp16, [512, 768]> bert_encoder_weight_to_fp16 = const()[name = string("bert_encoder_weight_to_fp16"), val = tensor<fp16, [512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11404800)))];
|
| 437 |
+
tensor<fp16, [512]> bert_encoder_bias_to_fp16 = const()[name = string("bert_encoder_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12191296)))];
|
| 438 |
+
tensor<fp16, [1, 256, 512]> linear_73_cast_fp16 = linear(bias = bert_encoder_bias_to_fp16, weight = bert_encoder_weight_to_fp16, x = sequence_output)[name = string("linear_73_cast_fp16")];
|
| 439 |
+
tensor<int32, [3]> var_1030_perm_0 = const()[name = string("op_1030_perm_0"), val = tensor<int32, [3]>([0, -1, -2])];
|
| 440 |
+
tensor<fp16, [1, 512, 256]> var_1030 = transpose(perm = var_1030_perm_0, x = linear_73_cast_fp16)[name = string("transpose_108")];
|
| 441 |
+
} -> (sequence_output, var_1030);
|
| 442 |
+
}
|
iteration_3/compiled/bert_fp16_t256.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7569b53a68b9664e246fda171851daa1dd5f01f64aa31533dfcaf40f4034fee3
|
| 3 |
+
size 12192384
|
iteration_3/compiled/bert_fp16_t64.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fff70f6d02af452c0cfe9133a60c0ddaea7da51bca82b19885aaf90d49533955
|
| 3 |
+
size 243
|
iteration_3/compiled/bert_fp16_t64.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ab9e12f625df7bfa9daa01c7c3a319d095408cf032b482e2f6733748de5e850
|
| 3 |
+
size 437
|
iteration_3/compiled/bert_fp16_t64.mlmodelc/metadata.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"metadataOutputVersion" : "3.0",
|
| 4 |
+
"storagePrecision" : "Float16",
|
| 5 |
+
"outputSchema" : [
|
| 6 |
+
{
|
| 7 |
+
"hasShapeFlexibility" : "0",
|
| 8 |
+
"isOptional" : "0",
|
| 9 |
+
"dataType" : "Float16",
|
| 10 |
+
"formattedType" : "MultiArray (Float16 1 × 64 × 768)",
|
| 11 |
+
"shortDescription" : "",
|
| 12 |
+
"shape" : "[1, 64, 768]",
|
| 13 |
+
"name" : "sequence_output",
|
| 14 |
+
"type" : "MultiArray"
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"hasShapeFlexibility" : "0",
|
| 18 |
+
"isOptional" : "0",
|
| 19 |
+
"dataType" : "Float16",
|
| 20 |
+
"formattedType" : "MultiArray (Float16 1 × 512 × 64)",
|
| 21 |
+
"shortDescription" : "",
|
| 22 |
+
"shape" : "[1, 512, 64]",
|
| 23 |
+
"name" : "var_1030",
|
| 24 |
+
"type" : "MultiArray"
|
| 25 |
+
}
|
| 26 |
+
],
|
| 27 |
+
"modelParameters" : [
|
| 28 |
+
|
| 29 |
+
],
|
| 30 |
+
"specificationVersion" : 9,
|
| 31 |
+
"mlProgramOperationTypeHistogram" : {
|
| 32 |
+
"Ios18.linear" : 74,
|
| 33 |
+
"Ios18.scaledDotProductAttention" : 12,
|
| 34 |
+
"Ios18.sub" : 1,
|
| 35 |
+
"Select" : 2,
|
| 36 |
+
"Ios18.expandDims" : 2,
|
| 37 |
+
"Ios18.gelu" : 12,
|
| 38 |
+
"Ios18.gather" : 1,
|
| 39 |
+
"Ios18.add" : 27,
|
| 40 |
+
"Tile" : 1,
|
| 41 |
+
"Ios18.layerNorm" : 25,
|
| 42 |
+
"Ios18.transpose" : 49,
|
| 43 |
+
"Ios18.cast" : 5,
|
| 44 |
+
"Ios18.reshape" : 48,
|
| 45 |
+
"Ios18.greaterEqual" : 1
|
| 46 |
+
},
|
| 47 |
+
"computePrecision" : "Mixed (Float16, Int16, Int32)",
|
| 48 |
+
"isUpdatable" : "0",
|
| 49 |
+
"stateSchema" : [
|
| 50 |
+
|
| 51 |
+
],
|
| 52 |
+
"availability" : {
|
| 53 |
+
"macOS" : "15.0",
|
| 54 |
+
"tvOS" : "18.0",
|
| 55 |
+
"visionOS" : "2.0",
|
| 56 |
+
"watchOS" : "11.0",
|
| 57 |
+
"iOS" : "18.0",
|
| 58 |
+
"macCatalyst" : "18.0"
|
| 59 |
+
},
|
| 60 |
+
"modelType" : {
|
| 61 |
+
"name" : "MLModelType_mlProgram"
|
| 62 |
+
},
|
| 63 |
+
"userDefinedMetadata" : {
|
| 64 |
+
"com.github.apple.coremltools.conversion_date" : "2026-05-08",
|
| 65 |
+
"com.github.apple.coremltools.source" : "torch==2.11.0",
|
| 66 |
+
"com.github.apple.coremltools.version" : "9.0",
|
| 67 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript"
|
| 68 |
+
},
|
| 69 |
+
"inputSchema" : [
|
| 70 |
+
{
|
| 71 |
+
"hasShapeFlexibility" : "0",
|
| 72 |
+
"isOptional" : "0",
|
| 73 |
+
"dataType" : "Int32",
|
| 74 |
+
"formattedType" : "MultiArray (Int32 1 × 64)",
|
| 75 |
+
"shortDescription" : "",
|
| 76 |
+
"shape" : "[1, 64]",
|
| 77 |
+
"name" : "tokens",
|
| 78 |
+
"type" : "MultiArray"
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"hasShapeFlexibility" : "0",
|
| 82 |
+
"isOptional" : "0",
|
| 83 |
+
"dataType" : "Int32",
|
| 84 |
+
"formattedType" : "MultiArray (Int32 1 × 64)",
|
| 85 |
+
"shortDescription" : "",
|
| 86 |
+
"shape" : "[1, 64]",
|
| 87 |
+
"name" : "attention_mask",
|
| 88 |
+
"type" : "MultiArray"
|
| 89 |
+
}
|
| 90 |
+
],
|
| 91 |
+
"generatedClassName" : "bert_fp16_t64",
|
| 92 |
+
"method" : "predict"
|
| 93 |
+
}
|
| 94 |
+
]
|
iteration_3/compiled/bert_fp16_t64.mlmodelc/model.mil
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
program(1.3)
|
| 2 |
+
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.11.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
|
| 3 |
+
{
|
| 4 |
+
func main<ios18>(tensor<int32, [1, 64]> attention_mask, tensor<int32, [1, 64]> tokens) {
|
| 5 |
+
int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
|
| 6 |
+
bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
|
| 7 |
+
tensor<fp16, [178, 128]> bert_embeddings_word_embeddings_weight_to_fp16 = const()[name = string("bert_embeddings_word_embeddings_weight_to_fp16"), val = tensor<fp16, [178, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
|
| 8 |
+
string tokens_to_int16_dtype_0 = const()[name = string("tokens_to_int16_dtype_0"), val = string("int16")];
|
| 9 |
+
string cast_53_dtype_0 = const()[name = string("cast_53_dtype_0"), val = string("int32")];
|
| 10 |
+
int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
|
| 11 |
+
tensor<int16, [1, 64]> tokens_to_int16 = cast(dtype = tokens_to_int16_dtype_0, x = tokens)[name = string("cast_58")];
|
| 12 |
+
tensor<int32, [1, 64]> cast_53 = cast(dtype = cast_53_dtype_0, x = tokens_to_int16)[name = string("cast_57")];
|
| 13 |
+
tensor<bool, [1, 64]> greater_equal_0 = greater_equal(x = cast_53, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
|
| 14 |
+
int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(178)];
|
| 15 |
+
tensor<int32, [1, 64]> add_0 = add(x = cast_53, y = slice_by_index_0)[name = string("add_0")];
|
| 16 |
+
tensor<int32, [1, 64]> select_0 = select(a = cast_53, b = add_0, cond = greater_equal_0)[name = string("select_0")];
|
| 17 |
+
int32 inputs_embeds_cast_fp16_cast_uint16_axis_0 = const()[name = string("inputs_embeds_cast_fp16_cast_uint16_axis_0"), val = int32(0)];
|
| 18 |
+
string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")];
|
| 19 |
+
tensor<int16, [1, 64]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_56")];
|
| 20 |
+
tensor<fp16, [1, 64, 128]> inputs_embeds_cast_fp16_cast_uint16_cast_uint16 = gather(axis = inputs_embeds_cast_fp16_cast_uint16_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = select_0_to_int16, validate_indices = inputs_embeds_validate_indices_0, x = bert_embeddings_word_embeddings_weight_to_fp16)[name = string("inputs_embeds_cast_fp16_cast_uint16_cast_uint16")];
|
| 21 |
+
tensor<fp16, [1, 64, 128]> token_type_embeddings_1_to_fp16 = const()[name = string("token_type_embeddings_1_to_fp16"), val = tensor<fp16, [1, 64, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45696)))];
|
| 22 |
+
tensor<fp16, [1, 64, 128]> embeddings_1_cast_fp16 = add(x = inputs_embeds_cast_fp16_cast_uint16_cast_uint16, y = token_type_embeddings_1_to_fp16)[name = string("embeddings_1_cast_fp16")];
|
| 23 |
+
tensor<fp16, [1, 64, 128]> position_embeddings_1_to_fp16 = const()[name = string("position_embeddings_1_to_fp16"), val = tensor<fp16, [1, 64, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62144)))];
|
| 24 |
+
tensor<fp16, [1, 64, 128]> input_5_cast_fp16 = add(x = embeddings_1_cast_fp16, y = position_embeddings_1_to_fp16)[name = string("input_5_cast_fp16")];
|
| 25 |
+
tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 26 |
+
tensor<fp16, [128]> bert_embeddings_LayerNorm_weight_to_fp16 = const()[name = string("bert_embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78592)))];
|
| 27 |
+
tensor<fp16, [128]> bert_embeddings_LayerNorm_bias_to_fp16 = const()[name = string("bert_embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78912)))];
|
| 28 |
+
fp16 var_34_to_fp16 = const()[name = string("op_34_to_fp16"), val = fp16(0x1p-24)];
|
| 29 |
+
tensor<fp16, [1, 64, 128]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = bert_embeddings_LayerNorm_bias_to_fp16, epsilon = var_34_to_fp16, gamma = bert_embeddings_LayerNorm_weight_to_fp16, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")];
|
| 30 |
+
tensor<int32, [1]> var_79_axes_0 = const()[name = string("op_79_axes_0"), val = tensor<int32, [1]>([1])];
|
| 31 |
+
tensor<int32, [1, 1, 64]> var_79 = expand_dims(axes = var_79_axes_0, x = attention_mask)[name = string("op_79")];
|
| 32 |
+
tensor<int32, [1]> var_81_axes_0 = const()[name = string("op_81_axes_0"), val = tensor<int32, [1]>([2])];
|
| 33 |
+
tensor<int32, [1, 1, 1, 64]> var_81 = expand_dims(axes = var_81_axes_0, x = var_79)[name = string("op_81")];
|
| 34 |
+
tensor<int32, [4]> var_90_reps_0 = const()[name = string("op_90_reps_0"), val = tensor<int32, [4]>([1, 1, 64, 1])];
|
| 35 |
+
tensor<int32, [1, 1, 64, 64]> var_90 = tile(reps = var_90_reps_0, x = var_81)[name = string("op_90")];
|
| 36 |
+
fp16 var_96_to_fp16 = const()[name = string("op_96_to_fp16"), val = fp16(0x1p+0)];
|
| 37 |
+
string var_95_to_fp16_dtype_0 = const()[name = string("op_95_to_fp16_dtype_0"), val = string("fp16")];
|
| 38 |
+
tensor<fp16, [1, 1, 64, 64]> var_90_to_fp16 = cast(dtype = var_95_to_fp16_dtype_0, x = var_90)[name = string("cast_55")];
|
| 39 |
+
tensor<fp16, [1, 1, 64, 64]> inverted_mask_cast_fp16 = sub(x = var_96_to_fp16, y = var_90_to_fp16)[name = string("inverted_mask_cast_fp16")];
|
| 40 |
+
string var_103_dtype_0 = const()[name = string("op_103_dtype_0"), val = string("bool")];
|
| 41 |
+
fp16 var_104_to_fp16 = const()[name = string("op_104_to_fp16"), val = fp16(-inf)];
|
| 42 |
+
tensor<bool, [1, 1, 64, 64]> inverted_mask_cast_fp16_to_bool = cast(dtype = var_103_dtype_0, x = inverted_mask_cast_fp16)[name = string("cast_54")];
|
| 43 |
+
tensor<fp16, [1, 1, 64, 64]> attention_mask_cast_fp16 = select(a = var_104_to_fp16, b = inverted_mask_cast_fp16, cond = inverted_mask_cast_fp16_to_bool)[name = string("attention_mask_cast_fp16")];
|
| 44 |
+
tensor<fp16, [768, 128]> bert_encoder_embedding_hidden_mapping_in_weight_to_fp16 = const()[name = string("bert_encoder_embedding_hidden_mapping_in_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79232)))];
|
| 45 |
+
tensor<fp16, [768]> bert_encoder_embedding_hidden_mapping_in_bias_to_fp16 = const()[name = string("bert_encoder_embedding_hidden_mapping_in_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275904)))];
|
| 46 |
+
tensor<fp16, [1, 64, 768]> linear_0_cast_fp16 = linear(bias = bert_encoder_embedding_hidden_mapping_in_bias_to_fp16, weight = bert_encoder_embedding_hidden_mapping_in_weight_to_fp16, x = input_7_cast_fp16)[name = string("linear_0_cast_fp16")];
|
| 47 |
+
tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277504)))];
|
| 48 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1457216)))];
|
| 49 |
+
tensor<fp16, [1, 64, 768]> linear_1_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_1_cast_fp16")];
|
| 50 |
+
tensor<int32, [4]> var_143 = const()[name = string("op_143"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 51 |
+
tensor<fp16, [1, 64, 12, 64]> x_3_cast_fp16 = reshape(shape = var_143, x = linear_1_cast_fp16)[name = string("x_3_cast_fp16")];
|
| 52 |
+
tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1458816)))];
|
| 53 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2638528)))];
|
| 54 |
+
tensor<fp16, [1, 64, 768]> linear_2_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_2_cast_fp16")];
|
| 55 |
+
tensor<int32, [4]> var_152 = const()[name = string("op_152"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 56 |
+
tensor<fp16, [1, 64, 12, 64]> x_7_cast_fp16 = reshape(shape = var_152, x = linear_2_cast_fp16)[name = string("x_7_cast_fp16")];
|
| 57 |
+
tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2640128)))];
|
| 58 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3819840)))];
|
| 59 |
+
tensor<fp16, [1, 64, 768]> linear_3_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_3_cast_fp16")];
|
| 60 |
+
tensor<int32, [4]> var_161 = const()[name = string("op_161"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 61 |
+
tensor<fp16, [1, 64, 12, 64]> x_11_cast_fp16 = reshape(shape = var_161, x = linear_3_cast_fp16)[name = string("x_11_cast_fp16")];
|
| 62 |
+
tensor<int32, [4]> transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 63 |
+
tensor<int32, [4]> transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 64 |
+
tensor<int32, [4]> transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 65 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_74 = transpose(perm = transpose_74_perm_0, x = x_11_cast_fp16)[name = string("transpose_154")];
|
| 66 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_73 = transpose(perm = transpose_73_perm_0, x = x_7_cast_fp16)[name = string("transpose_155")];
|
| 67 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_72 = transpose(perm = transpose_72_perm_0, x = x_3_cast_fp16)[name = string("transpose_156")];
|
| 68 |
+
tensor<fp16, [1, 12, 64, 64]> attention_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_73, query = transpose_72, value = transpose_74)[name = string("attention_output_1_cast_fp16")];
|
| 69 |
+
tensor<int32, [4]> attention_output_3_perm_0 = const()[name = string("attention_output_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 70 |
+
tensor<int32, [3]> var_167 = const()[name = string("op_167"), val = tensor<int32, [3]>([1, 64, 768])];
|
| 71 |
+
tensor<fp16, [1, 64, 12, 64]> attention_output_3_cast_fp16 = transpose(perm = attention_output_3_perm_0, x = attention_output_1_cast_fp16)[name = string("transpose_153")];
|
| 72 |
+
tensor<fp16, [1, 64, 768]> input_9_cast_fp16 = reshape(shape = var_167, x = attention_output_3_cast_fp16)[name = string("input_9_cast_fp16")];
|
| 73 |
+
tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3821440)))];
|
| 74 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5001152)))];
|
| 75 |
+
tensor<fp16, [1, 64, 768]> linear_4_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_9_cast_fp16)[name = string("linear_4_cast_fp16")];
|
| 76 |
+
tensor<fp16, [1, 64, 768]> input_11_cast_fp16 = add(x = linear_0_cast_fp16, y = linear_4_cast_fp16)[name = string("input_11_cast_fp16")];
|
| 77 |
+
tensor<int32, [1]> input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 78 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5002752)))];
|
| 79 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5004352)))];
|
| 80 |
+
fp16 var_118_to_fp16 = const()[name = string("op_118_to_fp16"), val = fp16(0x1p-24)];
|
| 81 |
+
tensor<fp16, [1, 64, 768]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_11_cast_fp16)[name = string("input_13_cast_fp16")];
|
| 82 |
+
tensor<fp16, [2048, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16"), val = tensor<fp16, [2048, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5005952)))];
|
| 83 |
+
tensor<fp16, [2048]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8151744)))];
|
| 84 |
+
tensor<fp16, [1, 64, 2048]> linear_5_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_13_cast_fp16)[name = string("linear_5_cast_fp16")];
|
| 85 |
+
string input_17_mode_0 = const()[name = string("input_17_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 86 |
+
tensor<fp16, [1, 64, 2048]> input_17_cast_fp16 = gelu(mode = input_17_mode_0, x = linear_5_cast_fp16)[name = string("input_17_cast_fp16")];
|
| 87 |
+
tensor<fp16, [768, 2048]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16"), val = tensor<fp16, [768, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8155904)))];
|
| 88 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11301696)))];
|
| 89 |
+
tensor<fp16, [1, 64, 768]> linear_6_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_17_cast_fp16)[name = string("linear_6_cast_fp16")];
|
| 90 |
+
tensor<fp16, [1, 64, 768]> input_19_cast_fp16 = add(x = linear_6_cast_fp16, y = input_13_cast_fp16)[name = string("input_19_cast_fp16")];
|
| 91 |
+
tensor<int32, [1]> hidden_states_3_axes_0 = const()[name = string("hidden_states_3_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 92 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11303296)))];
|
| 93 |
+
tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11304896)))];
|
| 94 |
+
tensor<fp16, [1, 64, 768]> hidden_states_3_cast_fp16 = layer_norm(axes = hidden_states_3_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_19_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
|
| 95 |
+
tensor<fp16, [1, 64, 768]> linear_7_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_7_cast_fp16")];
|
| 96 |
+
tensor<int32, [4]> var_218 = const()[name = string("op_218"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 97 |
+
tensor<fp16, [1, 64, 12, 64]> x_15_cast_fp16 = reshape(shape = var_218, x = linear_7_cast_fp16)[name = string("x_15_cast_fp16")];
|
| 98 |
+
tensor<fp16, [1, 64, 768]> linear_8_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_8_cast_fp16")];
|
| 99 |
+
tensor<int32, [4]> var_227 = const()[name = string("op_227"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 100 |
+
tensor<fp16, [1, 64, 12, 64]> x_19_cast_fp16 = reshape(shape = var_227, x = linear_8_cast_fp16)[name = string("x_19_cast_fp16")];
|
| 101 |
+
tensor<fp16, [1, 64, 768]> linear_9_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_9_cast_fp16")];
|
| 102 |
+
tensor<int32, [4]> var_236 = const()[name = string("op_236"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 103 |
+
tensor<fp16, [1, 64, 12, 64]> x_23_cast_fp16 = reshape(shape = var_236, x = linear_9_cast_fp16)[name = string("x_23_cast_fp16")];
|
| 104 |
+
tensor<int32, [4]> transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 105 |
+
tensor<int32, [4]> transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 106 |
+
tensor<int32, [4]> transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 107 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_77 = transpose(perm = transpose_77_perm_0, x = x_23_cast_fp16)[name = string("transpose_150")];
|
| 108 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_76 = transpose(perm = transpose_76_perm_0, x = x_19_cast_fp16)[name = string("transpose_151")];
|
| 109 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_75 = transpose(perm = transpose_75_perm_0, x = x_15_cast_fp16)[name = string("transpose_152")];
|
| 110 |
+
tensor<fp16, [1, 12, 64, 64]> attention_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_76, query = transpose_75, value = transpose_77)[name = string("attention_output_5_cast_fp16")];
|
| 111 |
+
tensor<int32, [4]> attention_output_7_perm_0 = const()[name = string("attention_output_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 112 |
+
tensor<int32, [3]> var_242 = const()[name = string("op_242"), val = tensor<int32, [3]>([1, 64, 768])];
|
| 113 |
+
tensor<fp16, [1, 64, 12, 64]> attention_output_7_cast_fp16 = transpose(perm = attention_output_7_perm_0, x = attention_output_5_cast_fp16)[name = string("transpose_149")];
|
| 114 |
+
tensor<fp16, [1, 64, 768]> input_21_cast_fp16 = reshape(shape = var_242, x = attention_output_7_cast_fp16)[name = string("input_21_cast_fp16")];
|
| 115 |
+
tensor<fp16, [1, 64, 768]> linear_10_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_21_cast_fp16)[name = string("linear_10_cast_fp16")];
|
| 116 |
+
tensor<fp16, [1, 64, 768]> input_23_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = linear_10_cast_fp16)[name = string("input_23_cast_fp16")];
|
| 117 |
+
tensor<int32, [1]> input_25_axes_0 = const()[name = string("input_25_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 118 |
+
tensor<fp16, [1, 64, 768]> input_25_cast_fp16 = layer_norm(axes = input_25_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_23_cast_fp16)[name = string("input_25_cast_fp16")];
|
| 119 |
+
tensor<fp16, [1, 64, 2048]> linear_11_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_25_cast_fp16)[name = string("linear_11_cast_fp16")];
|
| 120 |
+
string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 121 |
+
tensor<fp16, [1, 64, 2048]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = linear_11_cast_fp16)[name = string("input_29_cast_fp16")];
|
| 122 |
+
tensor<fp16, [1, 64, 768]> linear_12_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_29_cast_fp16)[name = string("linear_12_cast_fp16")];
|
| 123 |
+
tensor<fp16, [1, 64, 768]> input_31_cast_fp16 = add(x = linear_12_cast_fp16, y = input_25_cast_fp16)[name = string("input_31_cast_fp16")];
|
| 124 |
+
tensor<int32, [1]> hidden_states_5_axes_0 = const()[name = string("hidden_states_5_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 125 |
+
tensor<fp16, [1, 64, 768]> hidden_states_5_cast_fp16 = layer_norm(axes = hidden_states_5_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_31_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
|
| 126 |
+
tensor<fp16, [1, 64, 768]> linear_13_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_13_cast_fp16")];
|
| 127 |
+
tensor<int32, [4]> var_293 = const()[name = string("op_293"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 128 |
+
tensor<fp16, [1, 64, 12, 64]> x_27_cast_fp16 = reshape(shape = var_293, x = linear_13_cast_fp16)[name = string("x_27_cast_fp16")];
|
| 129 |
+
tensor<fp16, [1, 64, 768]> linear_14_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_14_cast_fp16")];
|
| 130 |
+
tensor<int32, [4]> var_302 = const()[name = string("op_302"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 131 |
+
tensor<fp16, [1, 64, 12, 64]> x_31_cast_fp16 = reshape(shape = var_302, x = linear_14_cast_fp16)[name = string("x_31_cast_fp16")];
|
| 132 |
+
tensor<fp16, [1, 64, 768]> linear_15_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_15_cast_fp16")];
|
| 133 |
+
tensor<int32, [4]> var_311 = const()[name = string("op_311"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 134 |
+
tensor<fp16, [1, 64, 12, 64]> x_35_cast_fp16 = reshape(shape = var_311, x = linear_15_cast_fp16)[name = string("x_35_cast_fp16")];
|
| 135 |
+
tensor<int32, [4]> transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 136 |
+
tensor<int32, [4]> transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 137 |
+
tensor<int32, [4]> transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 138 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_80 = transpose(perm = transpose_80_perm_0, x = x_35_cast_fp16)[name = string("transpose_146")];
|
| 139 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_79 = transpose(perm = transpose_79_perm_0, x = x_31_cast_fp16)[name = string("transpose_147")];
|
| 140 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_78 = transpose(perm = transpose_78_perm_0, x = x_27_cast_fp16)[name = string("transpose_148")];
|
| 141 |
+
tensor<fp16, [1, 12, 64, 64]> attention_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_79, query = transpose_78, value = transpose_80)[name = string("attention_output_9_cast_fp16")];
|
| 142 |
+
tensor<int32, [4]> attention_output_11_perm_0 = const()[name = string("attention_output_11_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 143 |
+
tensor<int32, [3]> var_317 = const()[name = string("op_317"), val = tensor<int32, [3]>([1, 64, 768])];
|
| 144 |
+
tensor<fp16, [1, 64, 12, 64]> attention_output_11_cast_fp16 = transpose(perm = attention_output_11_perm_0, x = attention_output_9_cast_fp16)[name = string("transpose_145")];
|
| 145 |
+
tensor<fp16, [1, 64, 768]> input_33_cast_fp16 = reshape(shape = var_317, x = attention_output_11_cast_fp16)[name = string("input_33_cast_fp16")];
|
| 146 |
+
tensor<fp16, [1, 64, 768]> linear_16_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_33_cast_fp16)[name = string("linear_16_cast_fp16")];
|
| 147 |
+
tensor<fp16, [1, 64, 768]> input_35_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = linear_16_cast_fp16)[name = string("input_35_cast_fp16")];
|
| 148 |
+
tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 149 |
+
tensor<fp16, [1, 64, 768]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("input_37_cast_fp16")];
|
| 150 |
+
tensor<fp16, [1, 64, 2048]> linear_17_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_37_cast_fp16)[name = string("linear_17_cast_fp16")];
|
| 151 |
+
string input_41_mode_0 = const()[name = string("input_41_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 152 |
+
tensor<fp16, [1, 64, 2048]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = linear_17_cast_fp16)[name = string("input_41_cast_fp16")];
|
| 153 |
+
tensor<fp16, [1, 64, 768]> linear_18_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_41_cast_fp16)[name = string("linear_18_cast_fp16")];
|
| 154 |
+
tensor<fp16, [1, 64, 768]> input_43_cast_fp16 = add(x = linear_18_cast_fp16, y = input_37_cast_fp16)[name = string("input_43_cast_fp16")];
|
| 155 |
+
tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 156 |
+
tensor<fp16, [1, 64, 768]> hidden_states_7_cast_fp16 = layer_norm(axes = hidden_states_7_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_43_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
|
| 157 |
+
tensor<fp16, [1, 64, 768]> linear_19_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_19_cast_fp16")];
|
| 158 |
+
tensor<int32, [4]> var_368 = const()[name = string("op_368"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 159 |
+
tensor<fp16, [1, 64, 12, 64]> x_39_cast_fp16 = reshape(shape = var_368, x = linear_19_cast_fp16)[name = string("x_39_cast_fp16")];
|
| 160 |
+
tensor<fp16, [1, 64, 768]> linear_20_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_20_cast_fp16")];
|
| 161 |
+
tensor<int32, [4]> var_377 = const()[name = string("op_377"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 162 |
+
tensor<fp16, [1, 64, 12, 64]> x_43_cast_fp16 = reshape(shape = var_377, x = linear_20_cast_fp16)[name = string("x_43_cast_fp16")];
|
| 163 |
+
tensor<fp16, [1, 64, 768]> linear_21_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_21_cast_fp16")];
|
| 164 |
+
tensor<int32, [4]> var_386 = const()[name = string("op_386"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 165 |
+
tensor<fp16, [1, 64, 12, 64]> x_47_cast_fp16 = reshape(shape = var_386, x = linear_21_cast_fp16)[name = string("x_47_cast_fp16")];
|
| 166 |
+
tensor<int32, [4]> transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 167 |
+
tensor<int32, [4]> transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 168 |
+
tensor<int32, [4]> transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 169 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_83 = transpose(perm = transpose_83_perm_0, x = x_47_cast_fp16)[name = string("transpose_142")];
|
| 170 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_82 = transpose(perm = transpose_82_perm_0, x = x_43_cast_fp16)[name = string("transpose_143")];
|
| 171 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_81 = transpose(perm = transpose_81_perm_0, x = x_39_cast_fp16)[name = string("transpose_144")];
|
| 172 |
+
tensor<fp16, [1, 12, 64, 64]> attention_output_13_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_82, query = transpose_81, value = transpose_83)[name = string("attention_output_13_cast_fp16")];
|
| 173 |
+
tensor<int32, [4]> attention_output_15_perm_0 = const()[name = string("attention_output_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 174 |
+
tensor<int32, [3]> var_392 = const()[name = string("op_392"), val = tensor<int32, [3]>([1, 64, 768])];
|
| 175 |
+
tensor<fp16, [1, 64, 12, 64]> attention_output_15_cast_fp16 = transpose(perm = attention_output_15_perm_0, x = attention_output_13_cast_fp16)[name = string("transpose_141")];
|
| 176 |
+
tensor<fp16, [1, 64, 768]> input_45_cast_fp16 = reshape(shape = var_392, x = attention_output_15_cast_fp16)[name = string("input_45_cast_fp16")];
|
| 177 |
+
tensor<fp16, [1, 64, 768]> linear_22_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_45_cast_fp16)[name = string("linear_22_cast_fp16")];
|
| 178 |
+
tensor<fp16, [1, 64, 768]> input_47_cast_fp16 = add(x = hidden_states_7_cast_fp16, y = linear_22_cast_fp16)[name = string("input_47_cast_fp16")];
|
| 179 |
+
tensor<int32, [1]> input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 180 |
+
tensor<fp16, [1, 64, 768]> input_49_cast_fp16 = layer_norm(axes = input_49_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_47_cast_fp16)[name = string("input_49_cast_fp16")];
|
| 181 |
+
tensor<fp16, [1, 64, 2048]> linear_23_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_49_cast_fp16)[name = string("linear_23_cast_fp16")];
|
| 182 |
+
string input_53_mode_0 = const()[name = string("input_53_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 183 |
+
tensor<fp16, [1, 64, 2048]> input_53_cast_fp16 = gelu(mode = input_53_mode_0, x = linear_23_cast_fp16)[name = string("input_53_cast_fp16")];
|
| 184 |
+
tensor<fp16, [1, 64, 768]> linear_24_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_53_cast_fp16)[name = string("linear_24_cast_fp16")];
|
| 185 |
+
tensor<fp16, [1, 64, 768]> input_55_cast_fp16 = add(x = linear_24_cast_fp16, y = input_49_cast_fp16)[name = string("input_55_cast_fp16")];
|
| 186 |
+
tensor<int32, [1]> hidden_states_9_axes_0 = const()[name = string("hidden_states_9_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 187 |
+
tensor<fp16, [1, 64, 768]> hidden_states_9_cast_fp16 = layer_norm(axes = hidden_states_9_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_55_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
|
| 188 |
+
tensor<fp16, [1, 64, 768]> linear_25_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_25_cast_fp16")];
|
| 189 |
+
tensor<int32, [4]> var_443 = const()[name = string("op_443"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 190 |
+
tensor<fp16, [1, 64, 12, 64]> x_51_cast_fp16 = reshape(shape = var_443, x = linear_25_cast_fp16)[name = string("x_51_cast_fp16")];
|
| 191 |
+
tensor<fp16, [1, 64, 768]> linear_26_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_26_cast_fp16")];
|
| 192 |
+
tensor<int32, [4]> var_452 = const()[name = string("op_452"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 193 |
+
tensor<fp16, [1, 64, 12, 64]> x_55_cast_fp16 = reshape(shape = var_452, x = linear_26_cast_fp16)[name = string("x_55_cast_fp16")];
|
| 194 |
+
tensor<fp16, [1, 64, 768]> linear_27_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_27_cast_fp16")];
|
| 195 |
+
tensor<int32, [4]> var_461 = const()[name = string("op_461"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 196 |
+
tensor<fp16, [1, 64, 12, 64]> x_59_cast_fp16 = reshape(shape = var_461, x = linear_27_cast_fp16)[name = string("x_59_cast_fp16")];
|
| 197 |
+
tensor<int32, [4]> transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 198 |
+
tensor<int32, [4]> transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 199 |
+
tensor<int32, [4]> transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 200 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_86 = transpose(perm = transpose_86_perm_0, x = x_59_cast_fp16)[name = string("transpose_138")];
|
| 201 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_85 = transpose(perm = transpose_85_perm_0, x = x_55_cast_fp16)[name = string("transpose_139")];
|
| 202 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_84 = transpose(perm = transpose_84_perm_0, x = x_51_cast_fp16)[name = string("transpose_140")];
|
| 203 |
+
tensor<fp16, [1, 12, 64, 64]> attention_output_17_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_85, query = transpose_84, value = transpose_86)[name = string("attention_output_17_cast_fp16")];
|
| 204 |
+
tensor<int32, [4]> attention_output_19_perm_0 = const()[name = string("attention_output_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 205 |
+
tensor<int32, [3]> var_467 = const()[name = string("op_467"), val = tensor<int32, [3]>([1, 64, 768])];
|
| 206 |
+
tensor<fp16, [1, 64, 12, 64]> attention_output_19_cast_fp16 = transpose(perm = attention_output_19_perm_0, x = attention_output_17_cast_fp16)[name = string("transpose_137")];
|
| 207 |
+
tensor<fp16, [1, 64, 768]> input_57_cast_fp16 = reshape(shape = var_467, x = attention_output_19_cast_fp16)[name = string("input_57_cast_fp16")];
|
| 208 |
+
tensor<fp16, [1, 64, 768]> linear_28_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_57_cast_fp16)[name = string("linear_28_cast_fp16")];
|
| 209 |
+
tensor<fp16, [1, 64, 768]> input_59_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_28_cast_fp16)[name = string("input_59_cast_fp16")];
|
| 210 |
+
tensor<int32, [1]> input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 211 |
+
tensor<fp16, [1, 64, 768]> input_61_cast_fp16 = layer_norm(axes = input_61_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_59_cast_fp16)[name = string("input_61_cast_fp16")];
|
| 212 |
+
tensor<fp16, [1, 64, 2048]> linear_29_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_61_cast_fp16)[name = string("linear_29_cast_fp16")];
|
| 213 |
+
string input_65_mode_0 = const()[name = string("input_65_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 214 |
+
tensor<fp16, [1, 64, 2048]> input_65_cast_fp16 = gelu(mode = input_65_mode_0, x = linear_29_cast_fp16)[name = string("input_65_cast_fp16")];
|
| 215 |
+
tensor<fp16, [1, 64, 768]> linear_30_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_65_cast_fp16)[name = string("linear_30_cast_fp16")];
|
| 216 |
+
tensor<fp16, [1, 64, 768]> input_67_cast_fp16 = add(x = linear_30_cast_fp16, y = input_61_cast_fp16)[name = string("input_67_cast_fp16")];
|
| 217 |
+
tensor<int32, [1]> hidden_states_11_axes_0 = const()[name = string("hidden_states_11_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 218 |
+
tensor<fp16, [1, 64, 768]> hidden_states_11_cast_fp16 = layer_norm(axes = hidden_states_11_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_67_cast_fp16)[name = string("hidden_states_11_cast_fp16")];
|
| 219 |
+
tensor<fp16, [1, 64, 768]> linear_31_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_31_cast_fp16")];
|
| 220 |
+
tensor<int32, [4]> var_518 = const()[name = string("op_518"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 221 |
+
tensor<fp16, [1, 64, 12, 64]> x_63_cast_fp16 = reshape(shape = var_518, x = linear_31_cast_fp16)[name = string("x_63_cast_fp16")];
|
| 222 |
+
tensor<fp16, [1, 64, 768]> linear_32_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_32_cast_fp16")];
|
| 223 |
+
tensor<int32, [4]> var_527 = const()[name = string("op_527"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 224 |
+
tensor<fp16, [1, 64, 12, 64]> x_67_cast_fp16 = reshape(shape = var_527, x = linear_32_cast_fp16)[name = string("x_67_cast_fp16")];
|
| 225 |
+
tensor<fp16, [1, 64, 768]> linear_33_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_33_cast_fp16")];
|
| 226 |
+
tensor<int32, [4]> var_536 = const()[name = string("op_536"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 227 |
+
tensor<fp16, [1, 64, 12, 64]> x_71_cast_fp16 = reshape(shape = var_536, x = linear_33_cast_fp16)[name = string("x_71_cast_fp16")];
|
| 228 |
+
tensor<int32, [4]> transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 229 |
+
tensor<int32, [4]> transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 230 |
+
tensor<int32, [4]> transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 231 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_89 = transpose(perm = transpose_89_perm_0, x = x_71_cast_fp16)[name = string("transpose_134")];
|
| 232 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_88 = transpose(perm = transpose_88_perm_0, x = x_67_cast_fp16)[name = string("transpose_135")];
|
| 233 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_87 = transpose(perm = transpose_87_perm_0, x = x_63_cast_fp16)[name = string("transpose_136")];
|
| 234 |
+
tensor<fp16, [1, 12, 64, 64]> attention_output_21_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_88, query = transpose_87, value = transpose_89)[name = string("attention_output_21_cast_fp16")];
|
| 235 |
+
tensor<int32, [4]> attention_output_23_perm_0 = const()[name = string("attention_output_23_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 236 |
+
tensor<int32, [3]> var_542 = const()[name = string("op_542"), val = tensor<int32, [3]>([1, 64, 768])];
|
| 237 |
+
tensor<fp16, [1, 64, 12, 64]> attention_output_23_cast_fp16 = transpose(perm = attention_output_23_perm_0, x = attention_output_21_cast_fp16)[name = string("transpose_133")];
|
| 238 |
+
tensor<fp16, [1, 64, 768]> input_69_cast_fp16 = reshape(shape = var_542, x = attention_output_23_cast_fp16)[name = string("input_69_cast_fp16")];
|
| 239 |
+
tensor<fp16, [1, 64, 768]> linear_34_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_69_cast_fp16)[name = string("linear_34_cast_fp16")];
|
| 240 |
+
tensor<fp16, [1, 64, 768]> input_71_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = linear_34_cast_fp16)[name = string("input_71_cast_fp16")];
|
| 241 |
+
tensor<int32, [1]> input_73_axes_0 = const()[name = string("input_73_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 242 |
+
tensor<fp16, [1, 64, 768]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("input_73_cast_fp16")];
|
| 243 |
+
tensor<fp16, [1, 64, 2048]> linear_35_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_73_cast_fp16)[name = string("linear_35_cast_fp16")];
|
| 244 |
+
string input_77_mode_0 = const()[name = string("input_77_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 245 |
+
tensor<fp16, [1, 64, 2048]> input_77_cast_fp16 = gelu(mode = input_77_mode_0, x = linear_35_cast_fp16)[name = string("input_77_cast_fp16")];
|
| 246 |
+
tensor<fp16, [1, 64, 768]> linear_36_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_77_cast_fp16)[name = string("linear_36_cast_fp16")];
|
| 247 |
+
tensor<fp16, [1, 64, 768]> input_79_cast_fp16 = add(x = linear_36_cast_fp16, y = input_73_cast_fp16)[name = string("input_79_cast_fp16")];
|
| 248 |
+
tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 249 |
+
tensor<fp16, [1, 64, 768]> hidden_states_13_cast_fp16 = layer_norm(axes = hidden_states_13_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_79_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
|
| 250 |
+
tensor<fp16, [1, 64, 768]> linear_37_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_37_cast_fp16")];
|
| 251 |
+
tensor<int32, [4]> var_593 = const()[name = string("op_593"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 252 |
+
tensor<fp16, [1, 64, 12, 64]> x_75_cast_fp16 = reshape(shape = var_593, x = linear_37_cast_fp16)[name = string("x_75_cast_fp16")];
|
| 253 |
+
tensor<fp16, [1, 64, 768]> linear_38_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_38_cast_fp16")];
|
| 254 |
+
tensor<int32, [4]> var_602 = const()[name = string("op_602"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 255 |
+
tensor<fp16, [1, 64, 12, 64]> x_79_cast_fp16 = reshape(shape = var_602, x = linear_38_cast_fp16)[name = string("x_79_cast_fp16")];
|
| 256 |
+
tensor<fp16, [1, 64, 768]> linear_39_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_39_cast_fp16")];
|
| 257 |
+
tensor<int32, [4]> var_611 = const()[name = string("op_611"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 258 |
+
tensor<fp16, [1, 64, 12, 64]> x_83_cast_fp16 = reshape(shape = var_611, x = linear_39_cast_fp16)[name = string("x_83_cast_fp16")];
|
| 259 |
+
tensor<int32, [4]> transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 260 |
+
tensor<int32, [4]> transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 261 |
+
tensor<int32, [4]> transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 262 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_92 = transpose(perm = transpose_92_perm_0, x = x_83_cast_fp16)[name = string("transpose_130")];
|
| 263 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_91 = transpose(perm = transpose_91_perm_0, x = x_79_cast_fp16)[name = string("transpose_131")];
|
| 264 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_90 = transpose(perm = transpose_90_perm_0, x = x_75_cast_fp16)[name = string("transpose_132")];
|
| 265 |
+
tensor<fp16, [1, 12, 64, 64]> attention_output_25_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_91, query = transpose_90, value = transpose_92)[name = string("attention_output_25_cast_fp16")];
|
| 266 |
+
tensor<int32, [4]> attention_output_27_perm_0 = const()[name = string("attention_output_27_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 267 |
+
tensor<int32, [3]> var_617 = const()[name = string("op_617"), val = tensor<int32, [3]>([1, 64, 768])];
|
| 268 |
+
tensor<fp16, [1, 64, 12, 64]> attention_output_27_cast_fp16 = transpose(perm = attention_output_27_perm_0, x = attention_output_25_cast_fp16)[name = string("transpose_129")];
|
| 269 |
+
tensor<fp16, [1, 64, 768]> input_81_cast_fp16 = reshape(shape = var_617, x = attention_output_27_cast_fp16)[name = string("input_81_cast_fp16")];
|
| 270 |
+
tensor<fp16, [1, 64, 768]> linear_40_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_81_cast_fp16)[name = string("linear_40_cast_fp16")];
|
| 271 |
+
tensor<fp16, [1, 64, 768]> input_83_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = linear_40_cast_fp16)[name = string("input_83_cast_fp16")];
|
| 272 |
+
tensor<int32, [1]> input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 273 |
+
tensor<fp16, [1, 64, 768]> input_85_cast_fp16 = layer_norm(axes = input_85_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_83_cast_fp16)[name = string("input_85_cast_fp16")];
|
| 274 |
+
tensor<fp16, [1, 64, 2048]> linear_41_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_85_cast_fp16)[name = string("linear_41_cast_fp16")];
|
| 275 |
+
string input_89_mode_0 = const()[name = string("input_89_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 276 |
+
tensor<fp16, [1, 64, 2048]> input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = linear_41_cast_fp16)[name = string("input_89_cast_fp16")];
|
| 277 |
+
tensor<fp16, [1, 64, 768]> linear_42_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_89_cast_fp16)[name = string("linear_42_cast_fp16")];
|
| 278 |
+
tensor<fp16, [1, 64, 768]> input_91_cast_fp16 = add(x = linear_42_cast_fp16, y = input_85_cast_fp16)[name = string("input_91_cast_fp16")];
|
| 279 |
+
tensor<int32, [1]> hidden_states_15_axes_0 = const()[name = string("hidden_states_15_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 280 |
+
tensor<fp16, [1, 64, 768]> hidden_states_15_cast_fp16 = layer_norm(axes = hidden_states_15_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_91_cast_fp16)[name = string("hidden_states_15_cast_fp16")];
|
| 281 |
+
tensor<fp16, [1, 64, 768]> linear_43_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_43_cast_fp16")];
|
| 282 |
+
tensor<int32, [4]> var_668 = const()[name = string("op_668"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 283 |
+
tensor<fp16, [1, 64, 12, 64]> x_87_cast_fp16 = reshape(shape = var_668, x = linear_43_cast_fp16)[name = string("x_87_cast_fp16")];
|
| 284 |
+
tensor<fp16, [1, 64, 768]> linear_44_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_44_cast_fp16")];
|
| 285 |
+
tensor<int32, [4]> var_677 = const()[name = string("op_677"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 286 |
+
tensor<fp16, [1, 64, 12, 64]> x_91_cast_fp16 = reshape(shape = var_677, x = linear_44_cast_fp16)[name = string("x_91_cast_fp16")];
|
| 287 |
+
tensor<fp16, [1, 64, 768]> linear_45_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_45_cast_fp16")];
|
| 288 |
+
tensor<int32, [4]> var_686 = const()[name = string("op_686"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 289 |
+
tensor<fp16, [1, 64, 12, 64]> x_95_cast_fp16 = reshape(shape = var_686, x = linear_45_cast_fp16)[name = string("x_95_cast_fp16")];
|
| 290 |
+
tensor<int32, [4]> transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 291 |
+
tensor<int32, [4]> transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 292 |
+
tensor<int32, [4]> transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 293 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_95 = transpose(perm = transpose_95_perm_0, x = x_95_cast_fp16)[name = string("transpose_126")];
|
| 294 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_94 = transpose(perm = transpose_94_perm_0, x = x_91_cast_fp16)[name = string("transpose_127")];
|
| 295 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_93 = transpose(perm = transpose_93_perm_0, x = x_87_cast_fp16)[name = string("transpose_128")];
|
| 296 |
+
tensor<fp16, [1, 12, 64, 64]> attention_output_29_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_94, query = transpose_93, value = transpose_95)[name = string("attention_output_29_cast_fp16")];
|
| 297 |
+
tensor<int32, [4]> attention_output_31_perm_0 = const()[name = string("attention_output_31_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 298 |
+
tensor<int32, [3]> var_692 = const()[name = string("op_692"), val = tensor<int32, [3]>([1, 64, 768])];
|
| 299 |
+
tensor<fp16, [1, 64, 12, 64]> attention_output_31_cast_fp16 = transpose(perm = attention_output_31_perm_0, x = attention_output_29_cast_fp16)[name = string("transpose_125")];
|
| 300 |
+
tensor<fp16, [1, 64, 768]> input_93_cast_fp16 = reshape(shape = var_692, x = attention_output_31_cast_fp16)[name = string("input_93_cast_fp16")];
|
| 301 |
+
tensor<fp16, [1, 64, 768]> linear_46_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_93_cast_fp16)[name = string("linear_46_cast_fp16")];
|
| 302 |
+
tensor<fp16, [1, 64, 768]> input_95_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = linear_46_cast_fp16)[name = string("input_95_cast_fp16")];
|
| 303 |
+
tensor<int32, [1]> input_97_axes_0 = const()[name = string("input_97_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 304 |
+
tensor<fp16, [1, 64, 768]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_95_cast_fp16)[name = string("input_97_cast_fp16")];
|
| 305 |
+
tensor<fp16, [1, 64, 2048]> linear_47_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_97_cast_fp16)[name = string("linear_47_cast_fp16")];
|
| 306 |
+
string input_101_mode_0 = const()[name = string("input_101_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 307 |
+
tensor<fp16, [1, 64, 2048]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = linear_47_cast_fp16)[name = string("input_101_cast_fp16")];
|
| 308 |
+
tensor<fp16, [1, 64, 768]> linear_48_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_101_cast_fp16)[name = string("linear_48_cast_fp16")];
|
| 309 |
+
tensor<fp16, [1, 64, 768]> input_103_cast_fp16 = add(x = linear_48_cast_fp16, y = input_97_cast_fp16)[name = string("input_103_cast_fp16")];
|
| 310 |
+
tensor<int32, [1]> hidden_states_17_axes_0 = const()[name = string("hidden_states_17_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 311 |
+
tensor<fp16, [1, 64, 768]> hidden_states_17_cast_fp16 = layer_norm(axes = hidden_states_17_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_103_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
|
| 312 |
+
tensor<fp16, [1, 64, 768]> linear_49_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_49_cast_fp16")];
|
| 313 |
+
tensor<int32, [4]> var_743 = const()[name = string("op_743"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 314 |
+
tensor<fp16, [1, 64, 12, 64]> x_99_cast_fp16 = reshape(shape = var_743, x = linear_49_cast_fp16)[name = string("x_99_cast_fp16")];
|
| 315 |
+
tensor<fp16, [1, 64, 768]> linear_50_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_50_cast_fp16")];
|
| 316 |
+
tensor<int32, [4]> var_752 = const()[name = string("op_752"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 317 |
+
tensor<fp16, [1, 64, 12, 64]> x_103_cast_fp16 = reshape(shape = var_752, x = linear_50_cast_fp16)[name = string("x_103_cast_fp16")];
|
| 318 |
+
tensor<fp16, [1, 64, 768]> linear_51_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_51_cast_fp16")];
|
| 319 |
+
tensor<int32, [4]> var_761 = const()[name = string("op_761"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 320 |
+
tensor<fp16, [1, 64, 12, 64]> x_107_cast_fp16 = reshape(shape = var_761, x = linear_51_cast_fp16)[name = string("x_107_cast_fp16")];
|
| 321 |
+
tensor<int32, [4]> transpose_96_perm_0 = const()[name = string("transpose_96_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 322 |
+
tensor<int32, [4]> transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 323 |
+
tensor<int32, [4]> transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 324 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_98 = transpose(perm = transpose_98_perm_0, x = x_107_cast_fp16)[name = string("transpose_122")];
|
| 325 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_97 = transpose(perm = transpose_97_perm_0, x = x_103_cast_fp16)[name = string("transpose_123")];
|
| 326 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_96 = transpose(perm = transpose_96_perm_0, x = x_99_cast_fp16)[name = string("transpose_124")];
|
| 327 |
+
tensor<fp16, [1, 12, 64, 64]> attention_output_33_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_97, query = transpose_96, value = transpose_98)[name = string("attention_output_33_cast_fp16")];
|
| 328 |
+
tensor<int32, [4]> attention_output_35_perm_0 = const()[name = string("attention_output_35_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 329 |
+
tensor<int32, [3]> var_767 = const()[name = string("op_767"), val = tensor<int32, [3]>([1, 64, 768])];
|
| 330 |
+
tensor<fp16, [1, 64, 12, 64]> attention_output_35_cast_fp16 = transpose(perm = attention_output_35_perm_0, x = attention_output_33_cast_fp16)[name = string("transpose_121")];
|
| 331 |
+
tensor<fp16, [1, 64, 768]> input_105_cast_fp16 = reshape(shape = var_767, x = attention_output_35_cast_fp16)[name = string("input_105_cast_fp16")];
|
| 332 |
+
tensor<fp16, [1, 64, 768]> linear_52_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_105_cast_fp16)[name = string("linear_52_cast_fp16")];
|
| 333 |
+
tensor<fp16, [1, 64, 768]> input_107_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_52_cast_fp16)[name = string("input_107_cast_fp16")];
|
| 334 |
+
tensor<int32, [1]> input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 335 |
+
tensor<fp16, [1, 64, 768]> input_109_cast_fp16 = layer_norm(axes = input_109_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")];
|
| 336 |
+
tensor<fp16, [1, 64, 2048]> linear_53_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_109_cast_fp16)[name = string("linear_53_cast_fp16")];
|
| 337 |
+
string input_113_mode_0 = const()[name = string("input_113_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 338 |
+
tensor<fp16, [1, 64, 2048]> input_113_cast_fp16 = gelu(mode = input_113_mode_0, x = linear_53_cast_fp16)[name = string("input_113_cast_fp16")];
|
| 339 |
+
tensor<fp16, [1, 64, 768]> linear_54_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_113_cast_fp16)[name = string("linear_54_cast_fp16")];
|
| 340 |
+
tensor<fp16, [1, 64, 768]> input_115_cast_fp16 = add(x = linear_54_cast_fp16, y = input_109_cast_fp16)[name = string("input_115_cast_fp16")];
|
| 341 |
+
tensor<int32, [1]> hidden_states_19_axes_0 = const()[name = string("hidden_states_19_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 342 |
+
tensor<fp16, [1, 64, 768]> hidden_states_19_cast_fp16 = layer_norm(axes = hidden_states_19_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_115_cast_fp16)[name = string("hidden_states_19_cast_fp16")];
|
| 343 |
+
tensor<fp16, [1, 64, 768]> linear_55_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_55_cast_fp16")];
|
| 344 |
+
tensor<int32, [4]> var_818 = const()[name = string("op_818"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 345 |
+
tensor<fp16, [1, 64, 12, 64]> x_111_cast_fp16 = reshape(shape = var_818, x = linear_55_cast_fp16)[name = string("x_111_cast_fp16")];
|
| 346 |
+
tensor<fp16, [1, 64, 768]> linear_56_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_56_cast_fp16")];
|
| 347 |
+
tensor<int32, [4]> var_827 = const()[name = string("op_827"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 348 |
+
tensor<fp16, [1, 64, 12, 64]> x_115_cast_fp16 = reshape(shape = var_827, x = linear_56_cast_fp16)[name = string("x_115_cast_fp16")];
|
| 349 |
+
tensor<fp16, [1, 64, 768]> linear_57_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_57_cast_fp16")];
|
| 350 |
+
tensor<int32, [4]> var_836 = const()[name = string("op_836"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 351 |
+
tensor<fp16, [1, 64, 12, 64]> x_119_cast_fp16 = reshape(shape = var_836, x = linear_57_cast_fp16)[name = string("x_119_cast_fp16")];
|
| 352 |
+
tensor<int32, [4]> transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 353 |
+
tensor<int32, [4]> transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 354 |
+
tensor<int32, [4]> transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 355 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_101 = transpose(perm = transpose_101_perm_0, x = x_119_cast_fp16)[name = string("transpose_118")];
|
| 356 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_100 = transpose(perm = transpose_100_perm_0, x = x_115_cast_fp16)[name = string("transpose_119")];
|
| 357 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_99 = transpose(perm = transpose_99_perm_0, x = x_111_cast_fp16)[name = string("transpose_120")];
|
| 358 |
+
tensor<fp16, [1, 12, 64, 64]> attention_output_37_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_100, query = transpose_99, value = transpose_101)[name = string("attention_output_37_cast_fp16")];
|
| 359 |
+
tensor<int32, [4]> attention_output_39_perm_0 = const()[name = string("attention_output_39_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 360 |
+
tensor<int32, [3]> var_842 = const()[name = string("op_842"), val = tensor<int32, [3]>([1, 64, 768])];
|
| 361 |
+
tensor<fp16, [1, 64, 12, 64]> attention_output_39_cast_fp16 = transpose(perm = attention_output_39_perm_0, x = attention_output_37_cast_fp16)[name = string("transpose_117")];
|
| 362 |
+
tensor<fp16, [1, 64, 768]> input_117_cast_fp16 = reshape(shape = var_842, x = attention_output_39_cast_fp16)[name = string("input_117_cast_fp16")];
|
| 363 |
+
tensor<fp16, [1, 64, 768]> linear_58_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_117_cast_fp16)[name = string("linear_58_cast_fp16")];
|
| 364 |
+
tensor<fp16, [1, 64, 768]> input_119_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = linear_58_cast_fp16)[name = string("input_119_cast_fp16")];
|
| 365 |
+
tensor<int32, [1]> input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 366 |
+
tensor<fp16, [1, 64, 768]> input_121_cast_fp16 = layer_norm(axes = input_121_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("input_121_cast_fp16")];
|
| 367 |
+
tensor<fp16, [1, 64, 2048]> linear_59_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_121_cast_fp16)[name = string("linear_59_cast_fp16")];
|
| 368 |
+
string input_125_mode_0 = const()[name = string("input_125_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 369 |
+
tensor<fp16, [1, 64, 2048]> input_125_cast_fp16 = gelu(mode = input_125_mode_0, x = linear_59_cast_fp16)[name = string("input_125_cast_fp16")];
|
| 370 |
+
tensor<fp16, [1, 64, 768]> linear_60_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_125_cast_fp16)[name = string("linear_60_cast_fp16")];
|
| 371 |
+
tensor<fp16, [1, 64, 768]> input_127_cast_fp16 = add(x = linear_60_cast_fp16, y = input_121_cast_fp16)[name = string("input_127_cast_fp16")];
|
| 372 |
+
tensor<int32, [1]> hidden_states_21_axes_0 = const()[name = string("hidden_states_21_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 373 |
+
tensor<fp16, [1, 64, 768]> hidden_states_21_cast_fp16 = layer_norm(axes = hidden_states_21_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_127_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
|
| 374 |
+
tensor<fp16, [1, 64, 768]> linear_61_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_61_cast_fp16")];
|
| 375 |
+
tensor<int32, [4]> var_893 = const()[name = string("op_893"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 376 |
+
tensor<fp16, [1, 64, 12, 64]> x_123_cast_fp16 = reshape(shape = var_893, x = linear_61_cast_fp16)[name = string("x_123_cast_fp16")];
|
| 377 |
+
tensor<fp16, [1, 64, 768]> linear_62_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_62_cast_fp16")];
|
| 378 |
+
tensor<int32, [4]> var_902 = const()[name = string("op_902"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 379 |
+
tensor<fp16, [1, 64, 12, 64]> x_127_cast_fp16 = reshape(shape = var_902, x = linear_62_cast_fp16)[name = string("x_127_cast_fp16")];
|
| 380 |
+
tensor<fp16, [1, 64, 768]> linear_63_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_63_cast_fp16")];
|
| 381 |
+
tensor<int32, [4]> var_911 = const()[name = string("op_911"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 382 |
+
tensor<fp16, [1, 64, 12, 64]> x_131_cast_fp16 = reshape(shape = var_911, x = linear_63_cast_fp16)[name = string("x_131_cast_fp16")];
|
| 383 |
+
tensor<int32, [4]> transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 384 |
+
tensor<int32, [4]> transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 385 |
+
tensor<int32, [4]> transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 386 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_104 = transpose(perm = transpose_104_perm_0, x = x_131_cast_fp16)[name = string("transpose_114")];
|
| 387 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_103 = transpose(perm = transpose_103_perm_0, x = x_127_cast_fp16)[name = string("transpose_115")];
|
| 388 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_102 = transpose(perm = transpose_102_perm_0, x = x_123_cast_fp16)[name = string("transpose_116")];
|
| 389 |
+
tensor<fp16, [1, 12, 64, 64]> attention_output_41_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_103, query = transpose_102, value = transpose_104)[name = string("attention_output_41_cast_fp16")];
|
| 390 |
+
tensor<int32, [4]> attention_output_43_perm_0 = const()[name = string("attention_output_43_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 391 |
+
tensor<int32, [3]> var_917 = const()[name = string("op_917"), val = tensor<int32, [3]>([1, 64, 768])];
|
| 392 |
+
tensor<fp16, [1, 64, 12, 64]> attention_output_43_cast_fp16 = transpose(perm = attention_output_43_perm_0, x = attention_output_41_cast_fp16)[name = string("transpose_113")];
|
| 393 |
+
tensor<fp16, [1, 64, 768]> input_129_cast_fp16 = reshape(shape = var_917, x = attention_output_43_cast_fp16)[name = string("input_129_cast_fp16")];
|
| 394 |
+
tensor<fp16, [1, 64, 768]> linear_64_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_129_cast_fp16)[name = string("linear_64_cast_fp16")];
|
| 395 |
+
tensor<fp16, [1, 64, 768]> input_131_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = linear_64_cast_fp16)[name = string("input_131_cast_fp16")];
|
| 396 |
+
tensor<int32, [1]> input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 397 |
+
tensor<fp16, [1, 64, 768]> input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_131_cast_fp16)[name = string("input_133_cast_fp16")];
|
| 398 |
+
tensor<fp16, [1, 64, 2048]> linear_65_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_133_cast_fp16)[name = string("linear_65_cast_fp16")];
|
| 399 |
+
string input_137_mode_0 = const()[name = string("input_137_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 400 |
+
tensor<fp16, [1, 64, 2048]> input_137_cast_fp16 = gelu(mode = input_137_mode_0, x = linear_65_cast_fp16)[name = string("input_137_cast_fp16")];
|
| 401 |
+
tensor<fp16, [1, 64, 768]> linear_66_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_137_cast_fp16)[name = string("linear_66_cast_fp16")];
|
| 402 |
+
tensor<fp16, [1, 64, 768]> input_139_cast_fp16 = add(x = linear_66_cast_fp16, y = input_133_cast_fp16)[name = string("input_139_cast_fp16")];
|
| 403 |
+
tensor<int32, [1]> hidden_states_axes_0 = const()[name = string("hidden_states_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 404 |
+
tensor<fp16, [1, 64, 768]> hidden_states_cast_fp16 = layer_norm(axes = hidden_states_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_139_cast_fp16)[name = string("hidden_states_cast_fp16")];
|
| 405 |
+
tensor<fp16, [1, 64, 768]> linear_67_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_67_cast_fp16")];
|
| 406 |
+
tensor<int32, [4]> var_968 = const()[name = string("op_968"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 407 |
+
tensor<fp16, [1, 64, 12, 64]> x_135_cast_fp16 = reshape(shape = var_968, x = linear_67_cast_fp16)[name = string("x_135_cast_fp16")];
|
| 408 |
+
tensor<fp16, [1, 64, 768]> linear_68_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_68_cast_fp16")];
|
| 409 |
+
tensor<int32, [4]> var_977 = const()[name = string("op_977"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 410 |
+
tensor<fp16, [1, 64, 12, 64]> x_139_cast_fp16 = reshape(shape = var_977, x = linear_68_cast_fp16)[name = string("x_139_cast_fp16")];
|
| 411 |
+
tensor<fp16, [1, 64, 768]> linear_69_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_69_cast_fp16")];
|
| 412 |
+
tensor<int32, [4]> var_986 = const()[name = string("op_986"), val = tensor<int32, [4]>([1, 64, 12, 64])];
|
| 413 |
+
tensor<fp16, [1, 64, 12, 64]> x_cast_fp16 = reshape(shape = var_986, x = linear_69_cast_fp16)[name = string("x_cast_fp16")];
|
| 414 |
+
tensor<int32, [4]> transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 415 |
+
tensor<int32, [4]> transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 416 |
+
tensor<int32, [4]> transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 417 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_107 = transpose(perm = transpose_107_perm_0, x = x_cast_fp16)[name = string("transpose_110")];
|
| 418 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_106 = transpose(perm = transpose_106_perm_0, x = x_139_cast_fp16)[name = string("transpose_111")];
|
| 419 |
+
tensor<fp16, [1, 12, 64, 64]> transpose_105 = transpose(perm = transpose_105_perm_0, x = x_135_cast_fp16)[name = string("transpose_112")];
|
| 420 |
+
tensor<fp16, [1, 12, 64, 64]> attention_output_45_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_106, query = transpose_105, value = transpose_107)[name = string("attention_output_45_cast_fp16")];
|
| 421 |
+
tensor<int32, [4]> attention_output_perm_0 = const()[name = string("attention_output_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 422 |
+
tensor<int32, [3]> var_992 = const()[name = string("op_992"), val = tensor<int32, [3]>([1, 64, 768])];
|
| 423 |
+
tensor<fp16, [1, 64, 12, 64]> attention_output_cast_fp16 = transpose(perm = attention_output_perm_0, x = attention_output_45_cast_fp16)[name = string("transpose_109")];
|
| 424 |
+
tensor<fp16, [1, 64, 768]> input_141_cast_fp16 = reshape(shape = var_992, x = attention_output_cast_fp16)[name = string("input_141_cast_fp16")];
|
| 425 |
+
tensor<fp16, [1, 64, 768]> linear_70_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_141_cast_fp16)[name = string("linear_70_cast_fp16")];
|
| 426 |
+
tensor<fp16, [1, 64, 768]> input_143_cast_fp16 = add(x = hidden_states_cast_fp16, y = linear_70_cast_fp16)[name = string("input_143_cast_fp16")];
|
| 427 |
+
tensor<int32, [1]> input_145_axes_0 = const()[name = string("input_145_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 428 |
+
tensor<fp16, [1, 64, 768]> input_145_cast_fp16 = layer_norm(axes = input_145_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_143_cast_fp16)[name = string("input_145_cast_fp16")];
|
| 429 |
+
tensor<fp16, [1, 64, 2048]> linear_71_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_145_cast_fp16)[name = string("linear_71_cast_fp16")];
|
| 430 |
+
string input_149_mode_0 = const()[name = string("input_149_mode_0"), val = string("TANH_APPROXIMATION")];
|
| 431 |
+
tensor<fp16, [1, 64, 2048]> input_149_cast_fp16 = gelu(mode = input_149_mode_0, x = linear_71_cast_fp16)[name = string("input_149_cast_fp16")];
|
| 432 |
+
tensor<fp16, [1, 64, 768]> linear_72_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_149_cast_fp16)[name = string("linear_72_cast_fp16")];
|
| 433 |
+
tensor<fp16, [1, 64, 768]> input_151_cast_fp16 = add(x = linear_72_cast_fp16, y = input_145_cast_fp16)[name = string("input_151_cast_fp16")];
|
| 434 |
+
tensor<int32, [1]> sequence_output_axes_0 = const()[name = string("sequence_output_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 435 |
+
tensor<fp16, [1, 64, 768]> sequence_output = layer_norm(axes = sequence_output_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_151_cast_fp16)[name = string("sequence_output_cast_fp16")];
|
| 436 |
+
tensor<fp16, [512, 768]> bert_encoder_weight_to_fp16 = const()[name = string("bert_encoder_weight_to_fp16"), val = tensor<fp16, [512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11306496)))];
|
| 437 |
+
tensor<fp16, [512]> bert_encoder_bias_to_fp16 = const()[name = string("bert_encoder_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12092992)))];
|
| 438 |
+
tensor<fp16, [1, 64, 512]> linear_73_cast_fp16 = linear(bias = bert_encoder_bias_to_fp16, weight = bert_encoder_weight_to_fp16, x = sequence_output)[name = string("linear_73_cast_fp16")];
|
| 439 |
+
tensor<int32, [3]> var_1030_perm_0 = const()[name = string("op_1030_perm_0"), val = tensor<int32, [3]>([0, -1, -2])];
|
| 440 |
+
tensor<fp16, [1, 512, 64]> var_1030 = transpose(perm = var_1030_perm_0, x = linear_73_cast_fp16)[name = string("transpose_108")];
|
| 441 |
+
} -> (sequence_output, var_1030);
|
| 442 |
+
}
|
iteration_3/compiled/bert_fp16_t64.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:629b75e73fbceeb89e92b0b85548bde59918208b424b8d6467202d72d82629b2
|
| 3 |
+
size 12094080
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc3f372a74ac4a7096282d6c8b15aca657091562cd9968823195b79b46634951
|
| 3 |
+
size 243
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c248985ef0758e1fdb436873593f6c37e0a645295dbdcda5bdc8f1c0b6d9efe8
|
| 3 |
+
size 462
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/metadata.json
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"metadataOutputVersion" : "3.0",
|
| 4 |
+
"storagePrecision" : "Float16",
|
| 5 |
+
"outputSchema" : [
|
| 6 |
+
{
|
| 7 |
+
"hasShapeFlexibility" : "0",
|
| 8 |
+
"isOptional" : "0",
|
| 9 |
+
"dataType" : "Float16",
|
| 10 |
+
"formattedType" : "MultiArray (Float16 1 × 1 × 256)",
|
| 11 |
+
"shortDescription" : "",
|
| 12 |
+
"shape" : "[1, 1, 256]",
|
| 13 |
+
"name" : "var_6225",
|
| 14 |
+
"type" : "MultiArray"
|
| 15 |
+
}
|
| 16 |
+
],
|
| 17 |
+
"modelParameters" : [
|
| 18 |
+
|
| 19 |
+
],
|
| 20 |
+
"specificationVersion" : 9,
|
| 21 |
+
"mlProgramOperationTypeHistogram" : {
|
| 22 |
+
"Ios18.expandDims" : 16,
|
| 23 |
+
"Ios18.softmax" : 24,
|
| 24 |
+
"Ios18.mul" : 117,
|
| 25 |
+
"Ios18.matmul" : 48,
|
| 26 |
+
"Ios16.reduceMean" : 8,
|
| 27 |
+
"Split" : 72,
|
| 28 |
+
"Tile" : 16,
|
| 29 |
+
"Ios18.add" : 188,
|
| 30 |
+
"Ios16.reduceSum" : 8,
|
| 31 |
+
"Ios18.layerNorm" : 24,
|
| 32 |
+
"Ios18.reshape" : 102,
|
| 33 |
+
"Ios18.linear" : 143,
|
| 34 |
+
"Ios18.conv" : 8,
|
| 35 |
+
"Ios18.gelu" : 41,
|
| 36 |
+
"Ios18.sub" : 8,
|
| 37 |
+
"Ios18.concat" : 8,
|
| 38 |
+
"Stack" : 8,
|
| 39 |
+
"Ios18.transpose" : 216,
|
| 40 |
+
"Ios18.cast" : 4,
|
| 41 |
+
"Ios18.sliceByIndex" : 4
|
| 42 |
+
},
|
| 43 |
+
"computePrecision" : "Mixed (Float16, Int32)",
|
| 44 |
+
"isUpdatable" : "0",
|
| 45 |
+
"stateSchema" : [
|
| 46 |
+
|
| 47 |
+
],
|
| 48 |
+
"availability" : {
|
| 49 |
+
"macOS" : "15.0",
|
| 50 |
+
"tvOS" : "18.0",
|
| 51 |
+
"visionOS" : "2.0",
|
| 52 |
+
"watchOS" : "11.0",
|
| 53 |
+
"iOS" : "18.0",
|
| 54 |
+
"macCatalyst" : "18.0"
|
| 55 |
+
},
|
| 56 |
+
"modelType" : {
|
| 57 |
+
"name" : "MLModelType_mlProgram"
|
| 58 |
+
},
|
| 59 |
+
"userDefinedMetadata" : {
|
| 60 |
+
"com.github.apple.coremltools.conversion_date" : "2026-05-08",
|
| 61 |
+
"com.github.apple.coremltools.source" : "torch==2.11.0",
|
| 62 |
+
"com.github.apple.coremltools.version" : "9.0",
|
| 63 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript"
|
| 64 |
+
},
|
| 65 |
+
"inputSchema" : [
|
| 66 |
+
{
|
| 67 |
+
"hasShapeFlexibility" : "0",
|
| 68 |
+
"isOptional" : "0",
|
| 69 |
+
"dataType" : "Float32",
|
| 70 |
+
"formattedType" : "MultiArray (Float32 1 × 1 × 256)",
|
| 71 |
+
"shortDescription" : "",
|
| 72 |
+
"shape" : "[1, 1, 256]",
|
| 73 |
+
"name" : "noise_init",
|
| 74 |
+
"type" : "MultiArray"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"hasShapeFlexibility" : "0",
|
| 78 |
+
"isOptional" : "0",
|
| 79 |
+
"dataType" : "Float32",
|
| 80 |
+
"formattedType" : "MultiArray (Float32 4 × 1 × 1 × 256)",
|
| 81 |
+
"shortDescription" : "",
|
| 82 |
+
"shape" : "[4, 1, 1, 256]",
|
| 83 |
+
"name" : "noises_aux",
|
| 84 |
+
"type" : "MultiArray"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"hasShapeFlexibility" : "0",
|
| 88 |
+
"isOptional" : "0",
|
| 89 |
+
"dataType" : "Float32",
|
| 90 |
+
"formattedType" : "MultiArray (Float32 1 × 128 × 768)",
|
| 91 |
+
"shortDescription" : "",
|
| 92 |
+
"shape" : "[1, 128, 768]",
|
| 93 |
+
"name" : "embedding",
|
| 94 |
+
"type" : "MultiArray"
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"hasShapeFlexibility" : "0",
|
| 98 |
+
"isOptional" : "0",
|
| 99 |
+
"dataType" : "Float32",
|
| 100 |
+
"formattedType" : "MultiArray (Float32 1 × 256)",
|
| 101 |
+
"shortDescription" : "",
|
| 102 |
+
"shape" : "[1, 256]",
|
| 103 |
+
"name" : "features",
|
| 104 |
+
"type" : "MultiArray"
|
| 105 |
+
}
|
| 106 |
+
],
|
| 107 |
+
"generatedClassName" : "fused_diffusion_sampler_fp16_t128",
|
| 108 |
+
"method" : "predict"
|
| 109 |
+
}
|
| 110 |
+
]
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/model.mil
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f823b5c638d2eb2fd91bf8e4efe4a90b2e1d3d9e2f5ab40e7e93cb03cd212aca
|
| 3 |
+
size 49361856
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2455735a48a29c69ac89e1af3a8255d27c8900fc1a8a9818fa2b7482ba74ed20
|
| 3 |
+
size 243
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6a02ae96aa0d591c0718c7e108d017012d034e6511a5a9132b62d9120b1a4db
|
| 3 |
+
size 462
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/metadata.json
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"metadataOutputVersion" : "3.0",
|
| 4 |
+
"storagePrecision" : "Float16",
|
| 5 |
+
"outputSchema" : [
|
| 6 |
+
{
|
| 7 |
+
"hasShapeFlexibility" : "0",
|
| 8 |
+
"isOptional" : "0",
|
| 9 |
+
"dataType" : "Float16",
|
| 10 |
+
"formattedType" : "MultiArray (Float16 1 × 1 × 256)",
|
| 11 |
+
"shortDescription" : "",
|
| 12 |
+
"shape" : "[1, 1, 256]",
|
| 13 |
+
"name" : "var_6225",
|
| 14 |
+
"type" : "MultiArray"
|
| 15 |
+
}
|
| 16 |
+
],
|
| 17 |
+
"modelParameters" : [
|
| 18 |
+
|
| 19 |
+
],
|
| 20 |
+
"specificationVersion" : 9,
|
| 21 |
+
"mlProgramOperationTypeHistogram" : {
|
| 22 |
+
"Ios18.expandDims" : 16,
|
| 23 |
+
"Ios18.softmax" : 24,
|
| 24 |
+
"Ios18.mul" : 117,
|
| 25 |
+
"Ios18.matmul" : 48,
|
| 26 |
+
"Ios16.reduceMean" : 8,
|
| 27 |
+
"Split" : 72,
|
| 28 |
+
"Tile" : 16,
|
| 29 |
+
"Ios18.add" : 188,
|
| 30 |
+
"Ios16.reduceSum" : 8,
|
| 31 |
+
"Ios18.layerNorm" : 24,
|
| 32 |
+
"Ios18.reshape" : 102,
|
| 33 |
+
"Ios18.linear" : 143,
|
| 34 |
+
"Ios18.conv" : 8,
|
| 35 |
+
"Ios18.gelu" : 41,
|
| 36 |
+
"Ios18.sub" : 8,
|
| 37 |
+
"Ios18.concat" : 8,
|
| 38 |
+
"Stack" : 8,
|
| 39 |
+
"Ios18.transpose" : 216,
|
| 40 |
+
"Ios18.cast" : 4,
|
| 41 |
+
"Ios18.sliceByIndex" : 4
|
| 42 |
+
},
|
| 43 |
+
"computePrecision" : "Mixed (Float16, Int32)",
|
| 44 |
+
"isUpdatable" : "0",
|
| 45 |
+
"stateSchema" : [
|
| 46 |
+
|
| 47 |
+
],
|
| 48 |
+
"availability" : {
|
| 49 |
+
"macOS" : "15.0",
|
| 50 |
+
"tvOS" : "18.0",
|
| 51 |
+
"visionOS" : "2.0",
|
| 52 |
+
"watchOS" : "11.0",
|
| 53 |
+
"iOS" : "18.0",
|
| 54 |
+
"macCatalyst" : "18.0"
|
| 55 |
+
},
|
| 56 |
+
"modelType" : {
|
| 57 |
+
"name" : "MLModelType_mlProgram"
|
| 58 |
+
},
|
| 59 |
+
"userDefinedMetadata" : {
|
| 60 |
+
"com.github.apple.coremltools.conversion_date" : "2026-05-08",
|
| 61 |
+
"com.github.apple.coremltools.source" : "torch==2.11.0",
|
| 62 |
+
"com.github.apple.coremltools.version" : "9.0",
|
| 63 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript"
|
| 64 |
+
},
|
| 65 |
+
"inputSchema" : [
|
| 66 |
+
{
|
| 67 |
+
"hasShapeFlexibility" : "0",
|
| 68 |
+
"isOptional" : "0",
|
| 69 |
+
"dataType" : "Float32",
|
| 70 |
+
"formattedType" : "MultiArray (Float32 1 × 1 × 256)",
|
| 71 |
+
"shortDescription" : "",
|
| 72 |
+
"shape" : "[1, 1, 256]",
|
| 73 |
+
"name" : "noise_init",
|
| 74 |
+
"type" : "MultiArray"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"hasShapeFlexibility" : "0",
|
| 78 |
+
"isOptional" : "0",
|
| 79 |
+
"dataType" : "Float32",
|
| 80 |
+
"formattedType" : "MultiArray (Float32 4 × 1 × 1 × 256)",
|
| 81 |
+
"shortDescription" : "",
|
| 82 |
+
"shape" : "[4, 1, 1, 256]",
|
| 83 |
+
"name" : "noises_aux",
|
| 84 |
+
"type" : "MultiArray"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"hasShapeFlexibility" : "0",
|
| 88 |
+
"isOptional" : "0",
|
| 89 |
+
"dataType" : "Float32",
|
| 90 |
+
"formattedType" : "MultiArray (Float32 1 × 256 × 768)",
|
| 91 |
+
"shortDescription" : "",
|
| 92 |
+
"shape" : "[1, 256, 768]",
|
| 93 |
+
"name" : "embedding",
|
| 94 |
+
"type" : "MultiArray"
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"hasShapeFlexibility" : "0",
|
| 98 |
+
"isOptional" : "0",
|
| 99 |
+
"dataType" : "Float32",
|
| 100 |
+
"formattedType" : "MultiArray (Float32 1 × 256)",
|
| 101 |
+
"shortDescription" : "",
|
| 102 |
+
"shape" : "[1, 256]",
|
| 103 |
+
"name" : "features",
|
| 104 |
+
"type" : "MultiArray"
|
| 105 |
+
}
|
| 106 |
+
],
|
| 107 |
+
"generatedClassName" : "fused_diffusion_sampler_fp16_t256",
|
| 108 |
+
"method" : "predict"
|
| 109 |
+
}
|
| 110 |
+
]
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/model.mil
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f823b5c638d2eb2fd91bf8e4efe4a90b2e1d3d9e2f5ab40e7e93cb03cd212aca
|
| 3 |
+
size 49361856
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:554dd7f0ac5139918b06d99c98e202a58621a5783502a2c6585851ae475ca47e
|
| 3 |
+
size 243
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f8eb46b781de148bb4eb0c62576773d51dddbf26741f916f59d5ae2b8778795
|
| 3 |
+
size 461
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/metadata.json
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"metadataOutputVersion" : "3.0",
|
| 4 |
+
"storagePrecision" : "Float16",
|
| 5 |
+
"outputSchema" : [
|
| 6 |
+
{
|
| 7 |
+
"hasShapeFlexibility" : "0",
|
| 8 |
+
"isOptional" : "0",
|
| 9 |
+
"dataType" : "Float16",
|
| 10 |
+
"formattedType" : "MultiArray (Float16 1 × 1 × 256)",
|
| 11 |
+
"shortDescription" : "",
|
| 12 |
+
"shape" : "[1, 1, 256]",
|
| 13 |
+
"name" : "var_6225",
|
| 14 |
+
"type" : "MultiArray"
|
| 15 |
+
}
|
| 16 |
+
],
|
| 17 |
+
"modelParameters" : [
|
| 18 |
+
|
| 19 |
+
],
|
| 20 |
+
"specificationVersion" : 9,
|
| 21 |
+
"mlProgramOperationTypeHistogram" : {
|
| 22 |
+
"Ios18.expandDims" : 16,
|
| 23 |
+
"Ios18.softmax" : 24,
|
| 24 |
+
"Ios18.mul" : 117,
|
| 25 |
+
"Ios18.matmul" : 48,
|
| 26 |
+
"Ios16.reduceMean" : 8,
|
| 27 |
+
"Split" : 72,
|
| 28 |
+
"Tile" : 16,
|
| 29 |
+
"Ios18.add" : 188,
|
| 30 |
+
"Ios16.reduceSum" : 8,
|
| 31 |
+
"Ios18.layerNorm" : 24,
|
| 32 |
+
"Ios18.reshape" : 102,
|
| 33 |
+
"Ios18.linear" : 143,
|
| 34 |
+
"Ios18.conv" : 8,
|
| 35 |
+
"Ios18.gelu" : 41,
|
| 36 |
+
"Ios18.sub" : 8,
|
| 37 |
+
"Ios18.concat" : 8,
|
| 38 |
+
"Stack" : 8,
|
| 39 |
+
"Ios18.transpose" : 216,
|
| 40 |
+
"Ios18.cast" : 4,
|
| 41 |
+
"Ios18.sliceByIndex" : 4
|
| 42 |
+
},
|
| 43 |
+
"computePrecision" : "Mixed (Float16, Int32)",
|
| 44 |
+
"isUpdatable" : "0",
|
| 45 |
+
"stateSchema" : [
|
| 46 |
+
|
| 47 |
+
],
|
| 48 |
+
"availability" : {
|
| 49 |
+
"macOS" : "15.0",
|
| 50 |
+
"tvOS" : "18.0",
|
| 51 |
+
"visionOS" : "2.0",
|
| 52 |
+
"watchOS" : "11.0",
|
| 53 |
+
"iOS" : "18.0",
|
| 54 |
+
"macCatalyst" : "18.0"
|
| 55 |
+
},
|
| 56 |
+
"modelType" : {
|
| 57 |
+
"name" : "MLModelType_mlProgram"
|
| 58 |
+
},
|
| 59 |
+
"userDefinedMetadata" : {
|
| 60 |
+
"com.github.apple.coremltools.conversion_date" : "2026-05-08",
|
| 61 |
+
"com.github.apple.coremltools.source" : "torch==2.11.0",
|
| 62 |
+
"com.github.apple.coremltools.version" : "9.0",
|
| 63 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript"
|
| 64 |
+
},
|
| 65 |
+
"inputSchema" : [
|
| 66 |
+
{
|
| 67 |
+
"hasShapeFlexibility" : "0",
|
| 68 |
+
"isOptional" : "0",
|
| 69 |
+
"dataType" : "Float32",
|
| 70 |
+
"formattedType" : "MultiArray (Float32 1 × 1 × 256)",
|
| 71 |
+
"shortDescription" : "",
|
| 72 |
+
"shape" : "[1, 1, 256]",
|
| 73 |
+
"name" : "noise_init",
|
| 74 |
+
"type" : "MultiArray"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"hasShapeFlexibility" : "0",
|
| 78 |
+
"isOptional" : "0",
|
| 79 |
+
"dataType" : "Float32",
|
| 80 |
+
"formattedType" : "MultiArray (Float32 4 × 1 × 1 × 256)",
|
| 81 |
+
"shortDescription" : "",
|
| 82 |
+
"shape" : "[4, 1, 1, 256]",
|
| 83 |
+
"name" : "noises_aux",
|
| 84 |
+
"type" : "MultiArray"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"hasShapeFlexibility" : "0",
|
| 88 |
+
"isOptional" : "0",
|
| 89 |
+
"dataType" : "Float32",
|
| 90 |
+
"formattedType" : "MultiArray (Float32 1 × 64 × 768)",
|
| 91 |
+
"shortDescription" : "",
|
| 92 |
+
"shape" : "[1, 64, 768]",
|
| 93 |
+
"name" : "embedding",
|
| 94 |
+
"type" : "MultiArray"
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"hasShapeFlexibility" : "0",
|
| 98 |
+
"isOptional" : "0",
|
| 99 |
+
"dataType" : "Float32",
|
| 100 |
+
"formattedType" : "MultiArray (Float32 1 × 256)",
|
| 101 |
+
"shortDescription" : "",
|
| 102 |
+
"shape" : "[1, 256]",
|
| 103 |
+
"name" : "features",
|
| 104 |
+
"type" : "MultiArray"
|
| 105 |
+
}
|
| 106 |
+
],
|
| 107 |
+
"generatedClassName" : "fused_diffusion_sampler_fp16_t64",
|
| 108 |
+
"method" : "predict"
|
| 109 |
+
}
|
| 110 |
+
]
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/model.mil
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f823b5c638d2eb2fd91bf8e4efe4a90b2e1d3d9e2f5ab40e7e93cb03cd212aca
|
| 3 |
+
size 49361856
|
iteration_3/packages/.DS_Store
CHANGED
|
Binary files a/iteration_3/packages/.DS_Store and b/iteration_3/packages/.DS_Store differ
|
|
|
iteration_3/packages/bert_fp16_t128.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86a52d9bd4b18a4a30aeb17118ca75cd57c729e45db4f4555f9b9745d33fa48a
|
| 3 |
+
size 85782
|
iteration_3/packages/bert_fp16_t128.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ff3ca8fac0332427ddfe5e78954382359d26516284113001a7484b60455eb10
|
| 3 |
+
size 12126848
|
iteration_3/packages/bert_fp16_t128.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fileFormatVersion": "1.0.0",
|
| 3 |
+
"itemInfoEntries": {
|
| 4 |
+
"8196AB77-8508-4F1F-8211-7AC9A128F5E9": {
|
| 5 |
+
"author": "com.apple.CoreML",
|
| 6 |
+
"description": "CoreML Model Weights",
|
| 7 |
+
"name": "weights",
|
| 8 |
+
"path": "com.apple.CoreML/weights"
|
| 9 |
+
},
|
| 10 |
+
"CBF6095A-75FA-4003-8CB6-EB68D8C98F8F": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Specification",
|
| 13 |
+
"name": "model.mlmodel",
|
| 14 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"rootModelIdentifier": "CBF6095A-75FA-4003-8CB6-EB68D8C98F8F"
|
| 18 |
+
}
|
iteration_3/packages/bert_fp16_t256.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96c9d6993382797abc86b9e8fae92f3af3a1cf0dca68c77eedbe5842f7f56706
|
| 3 |
+
size 85782
|
iteration_3/packages/bert_fp16_t256.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7569b53a68b9664e246fda171851daa1dd5f01f64aa31533dfcaf40f4034fee3
|
| 3 |
+
size 12192384
|
iteration_3/packages/bert_fp16_t256.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fileFormatVersion": "1.0.0",
|
| 3 |
+
"itemInfoEntries": {
|
| 4 |
+
"0DB32ECF-03B7-442F-883B-BAD01666A6EB": {
|
| 5 |
+
"author": "com.apple.CoreML",
|
| 6 |
+
"description": "CoreML Model Specification",
|
| 7 |
+
"name": "model.mlmodel",
|
| 8 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 9 |
+
},
|
| 10 |
+
"D3449315-4391-4FD3-A386-218E0755ACD4": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Weights",
|
| 13 |
+
"name": "weights",
|
| 14 |
+
"path": "com.apple.CoreML/weights"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"rootModelIdentifier": "0DB32ECF-03B7-442F-883B-BAD01666A6EB"
|
| 18 |
+
}
|
iteration_3/packages/bert_fp16_t64.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ce83515fe05aeb2fab52063ca9a04aa700254ca393d0202e259ab3f60c0b99c
|
| 3 |
+
size 85458
|
iteration_3/packages/bert_fp16_t64.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:629b75e73fbceeb89e92b0b85548bde59918208b424b8d6467202d72d82629b2
|
| 3 |
+
size 12094080
|
iteration_3/packages/bert_fp16_t64.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fileFormatVersion": "1.0.0",
|
| 3 |
+
"itemInfoEntries": {
|
| 4 |
+
"7919D85D-6DA9-40DE-8836-373256963D46": {
|
| 5 |
+
"author": "com.apple.CoreML",
|
| 6 |
+
"description": "CoreML Model Weights",
|
| 7 |
+
"name": "weights",
|
| 8 |
+
"path": "com.apple.CoreML/weights"
|
| 9 |
+
},
|
| 10 |
+
"C58FFFDE-2C63-420D-A908-5E1FD94373FB": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Specification",
|
| 13 |
+
"name": "model.mlmodel",
|
| 14 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"rootModelIdentifier": "C58FFFDE-2C63-420D-A908-5E1FD94373FB"
|
| 18 |
+
}
|
iteration_3/packages/fused_diffusion_sampler_fp16_t128.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec9d3ede7bc94cf7ea72a865f1997159b7916d7549e9639b08a50ecb0ccee874
|
| 3 |
+
size 312430
|
iteration_3/packages/fused_diffusion_sampler_fp16_t128.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f823b5c638d2eb2fd91bf8e4efe4a90b2e1d3d9e2f5ab40e7e93cb03cd212aca
|
| 3 |
+
size 49361856
|
iteration_3/packages/fused_diffusion_sampler_fp16_t128.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fileFormatVersion": "1.0.0",
|
| 3 |
+
"itemInfoEntries": {
|
| 4 |
+
"0AD329AC-53BC-4290-912D-058CBA5BA5D7": {
|
| 5 |
+
"author": "com.apple.CoreML",
|
| 6 |
+
"description": "CoreML Model Specification",
|
| 7 |
+
"name": "model.mlmodel",
|
| 8 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 9 |
+
},
|
| 10 |
+
"9C5F32F4-A05E-4BF3-9782-D67784A8D9D5": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Weights",
|
| 13 |
+
"name": "weights",
|
| 14 |
+
"path": "com.apple.CoreML/weights"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"rootModelIdentifier": "0AD329AC-53BC-4290-912D-058CBA5BA5D7"
|
| 18 |
+
}
|
iteration_3/packages/fused_diffusion_sampler_fp16_t256.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab926a4ef86b086e32276632d7b6232c2b8b31566641a82a9cf5d51dfa92431f
|
| 3 |
+
size 312430
|
iteration_3/packages/fused_diffusion_sampler_fp16_t256.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f823b5c638d2eb2fd91bf8e4efe4a90b2e1d3d9e2f5ab40e7e93cb03cd212aca
|
| 3 |
+
size 49361856
|
iteration_3/packages/fused_diffusion_sampler_fp16_t256.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fileFormatVersion": "1.0.0",
|
| 3 |
+
"itemInfoEntries": {
|
| 4 |
+
"09308FB4-D32A-41FF-B79D-FAA53069F9CC": {
|
| 5 |
+
"author": "com.apple.CoreML",
|
| 6 |
+
"description": "CoreML Model Specification",
|
| 7 |
+
"name": "model.mlmodel",
|
| 8 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 9 |
+
},
|
| 10 |
+
"76E947D9-F207-4B3D-93D4-708641940DCF": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Weights",
|
| 13 |
+
"name": "weights",
|
| 14 |
+
"path": "com.apple.CoreML/weights"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"rootModelIdentifier": "09308FB4-D32A-41FF-B79D-FAA53069F9CC"
|
| 18 |
+
}
|
iteration_3/packages/fused_diffusion_sampler_fp16_t64.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8aed8b7b3017488f499400131aa62942687ab060d5a2bf628e1fc9054b50569
|
| 3 |
+
size 311547
|
iteration_3/packages/fused_diffusion_sampler_fp16_t64.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f823b5c638d2eb2fd91bf8e4efe4a90b2e1d3d9e2f5ab40e7e93cb03cd212aca
|
| 3 |
+
size 49361856
|