diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..39b59f06acad9da73b56f5eb2ed6af87e59b016a 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,61 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_0_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_0_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_10_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_10_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_11_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_11_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_12_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_12_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_13_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_13_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_14_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_14_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_15_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_15_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_16_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_16_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_17_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_17_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_18_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_18_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_19_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_19_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_1_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_1_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_20_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_20_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_21_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_21_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_22_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_22_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_23_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_23_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_24_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_24_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_25_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_25_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_26_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_26_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_27_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_27_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_2_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_2_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_3_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_3_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_4_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_4_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_5_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_5_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_6_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_6_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_7_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_7_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_8_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_8_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_9_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/layer_9_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/mx_modules/token_embedding.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text
+subscription/macos_ios/engines/tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/subscription/macos_ios/engines/llm_config.json b/subscription/macos_ios/engines/llm_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..39b555443e09d2c7138ed54cbf53dd99f9ff7477
--- /dev/null
+++ b/subscription/macos_ios/engines/llm_config.json
@@ -0,0 +1,22 @@
+{
+ "hidden_size": 1536,
+ "intermediate_size": 8960,
+ "num_attention_heads": 12,
+ "num_key_value_heads": 2,
+ "num_hidden_layers": 28,
+ "vocab_size": 151936,
+ "max_position_embeddings": 32768,
+ "head_dim": null,
+ "attention_bias": false,
+ "mlp_bias": false,
+ "rope_scaling": {
+ "rope_theta": 1000000.0,
+ "rope_type": "default"
+ },
+ "original_max_position_embeddings": null,
+ "partial_rotary_factor": 1.0,
+ "tie_word_embeddings": true,
+ "rms_norm_eps": 1e-06,
+ "attention_dropout": 0.0,
+ "rope_theta": 10000.0
+}
\ No newline at end of file
diff --git a/subscription/macos_ios/engines/llm_runtime.json b/subscription/macos_ios/engines/llm_runtime.json
new file mode 100644
index 0000000000000000000000000000000000000000..d885973ab924267cce016f00af24c83855fe0e2d
--- /dev/null
+++ b/subscription/macos_ios/engines/llm_runtime.json
@@ -0,0 +1,10 @@
+{
+ "prefill_shapes": [
+ 128,
+ 256,
+ 512
+ ],
+ "decode_shape": 1,
+ "rope_inside_pre_attn": false,
+ "compute_dtype": "bfloat16"
+}
\ No newline at end of file
diff --git a/subscription/macos_ios/engines/mx_modules/final_norm.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/final_norm.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..0da0b382eddacdf52f7227c22b410834fce0713b
Binary files /dev/null and b/subscription/macos_ios/engines/mx_modules/final_norm.qlipmx.qlip differ
diff --git a/subscription/macos_ios/engines/mx_modules/layer_0_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_0_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..2d451a760045c2966737340bbf8812c0da80f2e4
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_0_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:798fdfbbee8152fedfc0fcf7bc4beb43d4b440fc5ee1bee89c99940f3fe091a6
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_0_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_0_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..cdc5cd2632ad91089a1229991a5d0c78c57dce21
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_0_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47e02a1e00725ef189d8a03b688011769920a91515f789bc4286426d79c69b9d
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_10_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_10_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..3d3eff0e91273d016c89bb025fb04a0da8cc1758
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_10_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17ff08b92bf606029547bc924a6dc89ff3d88b47a07dc715860d351af15693f8
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_10_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_10_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..159097609cc01798d8651de7b956511ac460c313
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_10_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b76f6255f4e37b938a4a90e14a2f75be7fa4cdfaa707bc271a7f26a404bf269
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_11_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_11_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..a3dda443062c3b003df8dab75bd25bbeaa001f10
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_11_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:798159223e39a953c3f347764d56b6c786a8aa97d2432f0269d7e2daf4fd25b3
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_11_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_11_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..c2452c3d2e791fa5e58dd6b5b620d78bdd6f54d0
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_11_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f84bc71279f907a8af27dcf04e8e258f5dc8ef280c9cb4af6a831cb8ca0fd21f
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_12_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_12_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..1c8914318072d0e879a86c7e83134050cdaacc1a
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_12_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c28c00843c7463c3cbd7c7e61c650e857e477b53696342bca7f709e3ca3e321
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_12_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_12_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..f557a9c5dd4508f7442b1e865a2f4a0392a5f8ef
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_12_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41c78b9c82e7037a03aef1809ce0ec4e5aa5c46bd4f40a63e0425e18b1fdb88f
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_13_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_13_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..42690dc1aaf8a80ecafd351e816cd7c9e19c75d5
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_13_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a3d79be3c04e2cbfcccd6617f87e6518c33ac057dd5cb1dbe6956ef3f04f3d3
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_13_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_13_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..4e6def16f3e7f3b7428422420ae36e4a4debc863
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_13_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc5eb719a9d850eddd31d0058c729da4d2fed8e5e58e112475eff13edcb326bf
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_14_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_14_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..9e2cc79ffdc7e388944ce00c97383edec7da7609
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_14_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46756cc3d071cb2ebd50df5f2649fdbcec5c49ad952ffecc8b887884e07ab470
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_14_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_14_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..1d5b90029d8c1a0686e0ce90548b3c03763c6c1c
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_14_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:216eb444dcfaed9d1249bcf4ff9fa42e3dbefe9b84dc166551b8c2fba025df3f
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_15_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_15_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..7ca30d12f16ae693649ed77ffe605e5c237da1d0
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_15_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:064648d5dc008f9285eab25732fd753db0f86e90943508a368337b22025ea377
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_15_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_15_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..82291b855e2840018fa984dadde8928daa43a683
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_15_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e680fc0f4aad84044bccd6f784bee43023e29181775bde33bb2b542ef8e32ae8
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_16_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_16_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..f5db240327b84185a8ae888da68c8c03fbde8a2e
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_16_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acf6525b47b87007632725465a1a8ae947b20655a1b6826476ff1aab1deca59c
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_16_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_16_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..e089d4201d956c82b9cec30f60b789ea1d7740c1
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_16_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a53fb708429b019dad4c8afb87d81fa4e1add0a163f8ecb071d54781e4604b8
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_17_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_17_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..cfd4c6c05dea5758bfe2d2bc13f8c5b3b713bbcb
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_17_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6385e3ad67e1bf84e97977e631e2a2e08ee206af347a2ac6a71ba2673046e12
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_17_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_17_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..5970b088c4d3aa8893090ecb12e54b725747b6d9
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_17_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:656f4715711bb996097c21032c5f78ea958579c1761619d9103896bc0606f93c
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_18_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_18_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..0ebaf64e225c446d271837dde3fdf22bf4420dd5
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_18_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cae2fbb0887d2393998e543b6c4ba8d7952dd796afcb6155f6a3510dd531186
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_18_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_18_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..8faf490498cddec7e9d1660b874cc3756b9a4f32
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_18_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adba4b79ed09e317ac53c7783a5bf806184459ca0547315a3e0474efc7013b6a
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_19_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_19_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..05bd85b9447ebf97cf433e1cc134989034abbf6c
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_19_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25b4aba7f8ca4491f527d96a4c45e9e5993b70cf4b30ab93b0cf50bffa5be1eb
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_19_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_19_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..719dfdfd55ef064120d8fb8f6e23fbe165ee4180
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_19_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8517e71d0e0b01326fa8be6f7f6fddb6e5676b028b1c49cfdd093a9619a6a7e
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_1_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_1_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..e742da7076c09e46af439ea9637b42b56d771885
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_1_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a35b875153fe746e0eb8b323d4c65ebb1d2f81eb0ac37b7318edf0e22e07a7ec
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_1_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_1_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..2c37ce0cbcff4ac3d44f5a5158a53a4daa6c8d0d
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_1_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0785cf633dc54c5749247bf5d0469025e655b03f9171906187e921cc90e3c8d7
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_20_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_20_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..3495bc92e3f2b6b5fe5ee37d0f5b3aa7c510936f
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_20_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da33bad0c48a72aa2f43deb2ec3eaef0fcba5a6d2cf1644fde2876919abd2394
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_20_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_20_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..2bfa1b2ab2f4a5c2f4134426be259af7e894256d
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_20_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b55d7035a28a6f6d3aab5b15929d177cda90d8a1e92b8409ecedc3b7cc8ded2e
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_21_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_21_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..53edbadfd85f4ac137a2f72e6652b276e6be8bbb
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_21_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79f7b5349ab68101a5092c13c2675346c98064751fe1d8a0d001fad915537599
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_21_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_21_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..4ca2ad346fdb89cc0198bc441f76740fb1512939
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_21_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c17b44ddae11426ee4ae992aafc73c2c42009a1f6ed10da6b8f9d2f05a59657
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_22_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_22_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..76cf90897b9e2a55a79757005d8e4437dd7e93f1
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_22_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d6be1a81eee815ea89a4bd154e2eceb07badf8c9de67cf3deb3a94aca7caebe
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_22_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_22_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..48947afc3054d1737a27d83a81da83053d94df16
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_22_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5842e760032baca8071ad29f5b37243b5ebc084f0b594b463818e3e7c4fa4748
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_23_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_23_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..1ff5075b58ed51daa3247ff4fe6a9b702494748e
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_23_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c019124f90c7588f67776273d95f949a0ee99a33ce2559b58ae999fcaefaa18
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_23_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_23_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..ecda94e542144d9642dea06dfc1290024191eb57
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_23_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc1e3a3fda333464d2db0c4a35a2e795503f2865a68bc122c39fd0190a13ab54
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_24_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_24_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..3a8d190e4bda2acd378add039d52426b93b64902
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_24_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a4cea172bfda5818243de6cbabff0636715b054e3931b8abd1a33ec9e0e3bf7
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_24_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_24_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..7cdef0ff575b68dc923dc1f56afb6278eaff84f4
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_24_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21e610206e2b572726ca300b5c1ae6f43452e47aca4162dca9c6c593b734b1da
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_25_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_25_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..80daf49725a07c3d9ee551320637b31cd4c3e104
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_25_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40c9fa3fa752177c4ce1cb4a775d319262f166c7fcfa77fdd02b198981c539a7
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_25_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_25_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..5380bfcf45ee0ece988e003d8d87ff4dbc9dd516
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_25_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:305daad87a3301b0d63ce403bfbb817b472aacd38725018209fb729330939882
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_26_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_26_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..0f737f12cad752a8d69a743bc94d40aa9133855e
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_26_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9bd7941e002cdf64f81957983f3cabb54befa84e3f8ae4aff16e4430462e641
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_26_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_26_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..8b2ee9a440b5381ede61e7212c287e6c339f4d8f
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_26_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbe16223e8f1888aef54015477920cfbd848359d897a7682b4e9a744d9d6f4dc
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_27_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_27_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..d475d9cee60164af230fe1c83744c80c772076c2
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_27_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd25673893790e5ed2a629acb755f55043b5ebe702427040a7601663737dba10
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_27_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_27_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..4cabd7b489f115d0de17d8773e647deb7b730f93
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_27_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9149cd4cb1bed5f5ad4f87717897238762ea98bad25d327a8c581704bc756aa2
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_2_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_2_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..7608d12609e9687e1ba4641e1abb08efdb399995
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_2_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b82f8dab6c53ca1f36f5c48fd35cd381eaa3098a95ddb573f3d6b7b3f1bfb766
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_2_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_2_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..14b32de7dfc165480aee1ea1c55e23d87225c095
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_2_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4886ff256155ff20eed23400be746699c7a08aefc7d084e60293416e98cef0e
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_3_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_3_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..28e7d46613fb95bb03272af8c818f4b584495e15
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_3_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e5f4ae75e61c0552ffcfdcdb6c58498208bddf182ba201893fd92260bf63daa
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_3_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_3_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..534138d399884b33f6e2beffaac4b47345d34af8
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_3_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2ce81ad749080bbbbc3bfd410f35273d92d0a870850783ef1a4774eb2e5f402
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_4_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_4_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..a49c68271440dd2a9b129ff337ead49f8c945867
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_4_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6fe4d7744b60c86086ab9bc9150b377cf7af9c2104a13c8188e1cbb4db42817
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_4_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_4_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..c3d5d3cf948624191f8b1f0d7b39310dde603df7
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_4_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e6ac7c52b2e81e054858b72277ed6ad3ecfdf07854f97e9394acb7c07abe299
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_5_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_5_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..a41e2d4e44552ec7ffe7ad07651c444727267cec
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_5_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:418abd8a861d7c8398075cc0f2467d5b047b24c10b1410a265f3f81ab897cfef
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_5_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_5_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..d9f0a69ee0da4dc5c232e1cef9da24fd34e45e9f
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_5_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f8bc12825bc86d4ce7f33c27bec4a62693e9b76228c978561612bb696914d21
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_6_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_6_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..38a1acdafdc495bc200277a94aed51ed3753dfb8
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_6_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4ba5f11808c8f7897cdd4e7b894cc3e5e874e8e4370e6b0c0e1ed160e3d79c0
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_6_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_6_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..82dcb6063dc87b6ab9d47c74cef9db54eb7a904c
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_6_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eacdedc5d8d56ba08447515c9863b0fcbd1a1bab667941d2d5f2f2c983dd01e9
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_7_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_7_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..653b2d8294df7a0d810dd52a4fd3d63951ad5e3f
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_7_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18f3e967103e3e18ba2c8604484c5cde37f2e0c79703e3cdcaaaf37221ac854e
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_7_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_7_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..8f63436c6ce73591e3fc23b5e20dff7c0e3687fe
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_7_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:015aa4a0236306a5e66ff570f96a6c1bb4a0066c876cde14fd14f936b1b9d468
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_8_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_8_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..37066a6ac68c343076bc91e12e48e04fff2fb00e
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_8_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:467eac4335da0d96c9b4744f0d86c64d3f551fe3ee2b3cfdee835d7ff4899275
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_8_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_8_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..f96e90153478f3d6a3645b49fe018dd3f2478974
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_8_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1f7d0e640193eead81c01c22163aa6e7618335b14e3b4f037c2fb2c2b1bab57
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/layer_9_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_9_post_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..09d3d3b05650e2e28c4b6a523b338e65df964b49
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_9_post_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:428c395bbfc657976c7e99b0651385130959134d9ba71ccb32d3915a6939b76c
+size 27300628
diff --git a/subscription/macos_ios/engines/mx_modules/layer_9_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_9_pre_attention.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..207db0b6d1741abf6df5504c8220be7970f7f45a
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/layer_9_pre_attention.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:503ed9c381fbe97d6b5a40acb4770c658053490207dbf47989f7d780fbcf162c
+size 6318308
diff --git a/subscription/macos_ios/engines/mx_modules/metadata.json b/subscription/macos_ios/engines/mx_modules/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..790452729553c25c544c7e25fe5e66c00910d3e1
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/metadata.json
@@ -0,0 +1,1699 @@
+{
+ "mlx_version": "0.30.6",
+ "model_version": "1.0.0",
+ "export_format_version": "1",
+ "created_at": "2026-03-11T16:36:58.846300+00:00",
+ "modules": {
+ "token_embedding": {
+ "path": "token_embedding.qlipmx.qlip",
+ "input_names": [
+ "token_or_hidden"
+ ],
+ "output_names": [
+ "output_0"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "token_or_hidden": {
+ "shape": [
+ 1,
+ 1
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "final_norm": {
+ "path": "final_norm.qlipmx.qlip",
+ "input_names": [
+ "hidden_states"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 1,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_0_pre_attention": {
+ "path": "layer_0_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_0_post_attention": {
+ "path": "layer_0_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_1_pre_attention": {
+ "path": "layer_1_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_1_post_attention": {
+ "path": "layer_1_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_2_pre_attention": {
+ "path": "layer_2_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_2_post_attention": {
+ "path": "layer_2_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_3_pre_attention": {
+ "path": "layer_3_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_3_post_attention": {
+ "path": "layer_3_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_4_pre_attention": {
+ "path": "layer_4_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_4_post_attention": {
+ "path": "layer_4_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_5_pre_attention": {
+ "path": "layer_5_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_5_post_attention": {
+ "path": "layer_5_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_6_pre_attention": {
+ "path": "layer_6_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_6_post_attention": {
+ "path": "layer_6_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_7_pre_attention": {
+ "path": "layer_7_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_7_post_attention": {
+ "path": "layer_7_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_8_pre_attention": {
+ "path": "layer_8_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_8_post_attention": {
+ "path": "layer_8_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_9_pre_attention": {
+ "path": "layer_9_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_9_post_attention": {
+ "path": "layer_9_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_10_pre_attention": {
+ "path": "layer_10_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_10_post_attention": {
+ "path": "layer_10_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_11_pre_attention": {
+ "path": "layer_11_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_11_post_attention": {
+ "path": "layer_11_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_12_pre_attention": {
+ "path": "layer_12_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_12_post_attention": {
+ "path": "layer_12_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_13_pre_attention": {
+ "path": "layer_13_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_13_post_attention": {
+ "path": "layer_13_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_14_pre_attention": {
+ "path": "layer_14_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_14_post_attention": {
+ "path": "layer_14_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_15_pre_attention": {
+ "path": "layer_15_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_15_post_attention": {
+ "path": "layer_15_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_16_pre_attention": {
+ "path": "layer_16_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_16_post_attention": {
+ "path": "layer_16_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_17_pre_attention": {
+ "path": "layer_17_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_17_post_attention": {
+ "path": "layer_17_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_18_pre_attention": {
+ "path": "layer_18_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_18_post_attention": {
+ "path": "layer_18_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_19_pre_attention": {
+ "path": "layer_19_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_19_post_attention": {
+ "path": "layer_19_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_20_pre_attention": {
+ "path": "layer_20_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_20_post_attention": {
+ "path": "layer_20_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_21_pre_attention": {
+ "path": "layer_21_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_21_post_attention": {
+ "path": "layer_21_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_22_pre_attention": {
+ "path": "layer_22_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_22_post_attention": {
+ "path": "layer_22_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_23_pre_attention": {
+ "path": "layer_23_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_23_post_attention": {
+ "path": "layer_23_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_24_pre_attention": {
+ "path": "layer_24_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_24_post_attention": {
+ "path": "layer_24_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_25_pre_attention": {
+ "path": "layer_25_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_25_post_attention": {
+ "path": "layer_25_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_26_pre_attention": {
+ "path": "layer_26_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_26_post_attention": {
+ "path": "layer_26_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ },
+ "layer_27_pre_attention": {
+ "path": "layer_27_pre_attention.qlipmx.qlip",
+ "input_names": [
+ "hidden_states",
+ "position_ids"
+ ],
+ "output_names": [
+ "q",
+ "k",
+ "v"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "hidden_states": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "position_ids": {
+ "shape": [
+ 1,
+ 128
+ ],
+ "dtype": "mlx.core.int32"
+ }
+ }
+ },
+ "layer_27_post_attention": {
+ "path": "layer_27_post_attention.qlipmx.qlip",
+ "input_names": [
+ "attn_output",
+ "residual"
+ ],
+ "output_names": [
+ "hidden_states"
+ ],
+ "eval_dtype": "bfloat16",
+ "inputs": {
+ "attn_output": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ },
+ "residual": {
+ "shape": [
+ 1,
+ 128,
+ 1536
+ ],
+ "dtype": "mlx.core.bfloat16"
+ }
+ }
+ }
+ }
+}
diff --git a/subscription/macos_ios/engines/mx_modules/token_embedding.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/token_embedding.qlipmx.qlip
new file mode 100644
index 0000000000000000000000000000000000000000..468d35ebd845a467aff97bbb12c3a63e4cbcabfa
--- /dev/null
+++ b/subscription/macos_ios/engines/mx_modules/token_embedding.qlipmx.qlip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35f43245d43466f0807c3d18dc218a8c43eb7de89fb6a4a1fde042ada60b9bb9
+size 247978508
diff --git a/subscription/macos_ios/engines/qlip_key.bin b/subscription/macos_ios/engines/qlip_key.bin
new file mode 100644
index 0000000000000000000000000000000000000000..17e783a1d2c6e3e5ece8cd05c491f75137999c38
--- /dev/null
+++ b/subscription/macos_ios/engines/qlip_key.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9a716bcdb001176286bececcef720f6340090a4aebda89affb581cf8939f88c
+size 32
diff --git a/subscription/macos_ios/engines/tokenizer/chat_template.jinja b/subscription/macos_ios/engines/tokenizer/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..bdf7919a96cfe43d50914a007b9c0877bd0ec27e
--- /dev/null
+++ b/subscription/macos_ios/engines/tokenizer/chat_template.jinja
@@ -0,0 +1,54 @@
+{%- if tools %}
+ {{- '<|im_start|>system\n' }}
+ {%- if messages[0]['role'] == 'system' %}
+ {{- messages[0]['content'] }}
+ {%- else %}
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
+ {%- endif %}
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }}
+{%- else %}
+ {%- if messages[0]['role'] == 'system' %}
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
+ {%- else %}
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- for message in messages %}
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {{- '<|im_start|>' + message.role }}
+ {%- if message.content %}
+ {{- '\n' + message.content }}
+ {%- endif %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {{- '\n\n{"name": "' }}
+ {{- tool_call.name }}
+ {{- '", "arguments": ' }}
+ {{- tool_call.arguments | tojson }}
+ {{- '}\n' }}
+ {%- endfor %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- message.content }}
+ {{- '\n' }}
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+{%- endif %}
diff --git a/subscription/macos_ios/engines/tokenizer/tokenizer.json b/subscription/macos_ios/engines/tokenizer/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c
--- /dev/null
+++ b/subscription/macos_ios/engines/tokenizer/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8
+size 11421892
diff --git a/subscription/macos_ios/engines/tokenizer/tokenizer_config.json b/subscription/macos_ios/engines/tokenizer/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7d75d3bb5300d205e48769cc1999073ab5971214
--- /dev/null
+++ b/subscription/macos_ios/engines/tokenizer/tokenizer_config.json
@@ -0,0 +1,29 @@
+{
+ "add_prefix_space": false,
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "extra_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>",
+ "<|object_ref_start|>",
+ "<|object_ref_end|>",
+ "<|box_start|>",
+ "<|box_end|>",
+ "<|quad_start|>",
+ "<|quad_end|>",
+ "<|vision_start|>",
+ "<|vision_end|>",
+ "<|vision_pad|>",
+ "<|image_pad|>",
+ "<|video_pad|>"
+ ],
+ "is_local": false,
+ "model_max_length": 131072,
+ "pad_token": "<|endoftext|>",
+ "split_special_tokens": false,
+ "tokenizer_class": "Qwen2Tokenizer",
+ "unk_token": null
+}