diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..39b59f06acad9da73b56f5eb2ed6af87e59b016a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,61 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_0_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_0_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_10_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_10_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_11_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_11_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_12_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_12_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_13_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_13_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_14_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_14_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_15_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_15_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_16_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_16_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_17_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_17_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_18_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_18_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_19_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_19_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_1_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_1_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_20_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_20_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_21_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_21_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_22_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_22_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_23_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_23_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_24_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_24_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_25_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_25_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_26_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_26_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_27_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_27_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_2_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_2_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_3_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_3_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_4_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_4_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_5_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_5_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_6_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_6_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_7_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_7_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_8_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_8_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_9_post_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/layer_9_pre_attention.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/mx_modules/token_embedding.qlipmx.qlip filter=lfs diff=lfs merge=lfs -text +subscription/macos_ios/engines/tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/subscription/macos_ios/engines/llm_config.json b/subscription/macos_ios/engines/llm_config.json new file mode 100644 index 0000000000000000000000000000000000000000..39b555443e09d2c7138ed54cbf53dd99f9ff7477 --- /dev/null +++ b/subscription/macos_ios/engines/llm_config.json @@ -0,0 +1,22 @@ +{ + "hidden_size": 1536, + "intermediate_size": 8960, + "num_attention_heads": 12, + "num_key_value_heads": 2, + "num_hidden_layers": 28, + "vocab_size": 151936, + "max_position_embeddings": 32768, + "head_dim": null, + "attention_bias": false, + "mlp_bias": false, + "rope_scaling": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "original_max_position_embeddings": null, + "partial_rotary_factor": 1.0, + "tie_word_embeddings": true, + "rms_norm_eps": 1e-06, + "attention_dropout": 0.0, + "rope_theta": 10000.0 +} \ No newline at end of file diff --git a/subscription/macos_ios/engines/llm_runtime.json b/subscription/macos_ios/engines/llm_runtime.json new file mode 100644 index 0000000000000000000000000000000000000000..d885973ab924267cce016f00af24c83855fe0e2d --- /dev/null +++ b/subscription/macos_ios/engines/llm_runtime.json @@ -0,0 +1,10 @@ +{ + "prefill_shapes": [ + 128, + 256, + 512 + ], + "decode_shape": 1, + "rope_inside_pre_attn": false, + "compute_dtype": "bfloat16" +} \ No newline at end of file diff --git a/subscription/macos_ios/engines/mx_modules/final_norm.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/final_norm.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..0da0b382eddacdf52f7227c22b410834fce0713b Binary files /dev/null and b/subscription/macos_ios/engines/mx_modules/final_norm.qlipmx.qlip differ diff --git a/subscription/macos_ios/engines/mx_modules/layer_0_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_0_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..2d451a760045c2966737340bbf8812c0da80f2e4 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_0_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:798fdfbbee8152fedfc0fcf7bc4beb43d4b440fc5ee1bee89c99940f3fe091a6 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_0_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_0_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..cdc5cd2632ad91089a1229991a5d0c78c57dce21 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_0_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47e02a1e00725ef189d8a03b688011769920a91515f789bc4286426d79c69b9d +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_10_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_10_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..3d3eff0e91273d016c89bb025fb04a0da8cc1758 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_10_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ff08b92bf606029547bc924a6dc89ff3d88b47a07dc715860d351af15693f8 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_10_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_10_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..159097609cc01798d8651de7b956511ac460c313 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_10_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b76f6255f4e37b938a4a90e14a2f75be7fa4cdfaa707bc271a7f26a404bf269 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_11_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_11_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..a3dda443062c3b003df8dab75bd25bbeaa001f10 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_11_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:798159223e39a953c3f347764d56b6c786a8aa97d2432f0269d7e2daf4fd25b3 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_11_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_11_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..c2452c3d2e791fa5e58dd6b5b620d78bdd6f54d0 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_11_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f84bc71279f907a8af27dcf04e8e258f5dc8ef280c9cb4af6a831cb8ca0fd21f +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_12_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_12_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..1c8914318072d0e879a86c7e83134050cdaacc1a --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_12_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c28c00843c7463c3cbd7c7e61c650e857e477b53696342bca7f709e3ca3e321 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_12_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_12_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..f557a9c5dd4508f7442b1e865a2f4a0392a5f8ef --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_12_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41c78b9c82e7037a03aef1809ce0ec4e5aa5c46bd4f40a63e0425e18b1fdb88f +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_13_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_13_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..42690dc1aaf8a80ecafd351e816cd7c9e19c75d5 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_13_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3d79be3c04e2cbfcccd6617f87e6518c33ac057dd5cb1dbe6956ef3f04f3d3 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_13_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_13_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..4e6def16f3e7f3b7428422420ae36e4a4debc863 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_13_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5eb719a9d850eddd31d0058c729da4d2fed8e5e58e112475eff13edcb326bf +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_14_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_14_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..9e2cc79ffdc7e388944ce00c97383edec7da7609 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_14_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46756cc3d071cb2ebd50df5f2649fdbcec5c49ad952ffecc8b887884e07ab470 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_14_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_14_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..1d5b90029d8c1a0686e0ce90548b3c03763c6c1c --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_14_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:216eb444dcfaed9d1249bcf4ff9fa42e3dbefe9b84dc166551b8c2fba025df3f +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_15_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_15_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..7ca30d12f16ae693649ed77ffe605e5c237da1d0 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_15_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064648d5dc008f9285eab25732fd753db0f86e90943508a368337b22025ea377 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_15_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_15_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..82291b855e2840018fa984dadde8928daa43a683 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_15_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e680fc0f4aad84044bccd6f784bee43023e29181775bde33bb2b542ef8e32ae8 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_16_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_16_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..f5db240327b84185a8ae888da68c8c03fbde8a2e --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_16_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acf6525b47b87007632725465a1a8ae947b20655a1b6826476ff1aab1deca59c +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_16_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_16_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..e089d4201d956c82b9cec30f60b789ea1d7740c1 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_16_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a53fb708429b019dad4c8afb87d81fa4e1add0a163f8ecb071d54781e4604b8 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_17_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_17_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..cfd4c6c05dea5758bfe2d2bc13f8c5b3b713bbcb --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_17_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6385e3ad67e1bf84e97977e631e2a2e08ee206af347a2ac6a71ba2673046e12 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_17_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_17_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..5970b088c4d3aa8893090ecb12e54b725747b6d9 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_17_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:656f4715711bb996097c21032c5f78ea958579c1761619d9103896bc0606f93c +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_18_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_18_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..0ebaf64e225c446d271837dde3fdf22bf4420dd5 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_18_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cae2fbb0887d2393998e543b6c4ba8d7952dd796afcb6155f6a3510dd531186 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_18_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_18_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..8faf490498cddec7e9d1660b874cc3756b9a4f32 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_18_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adba4b79ed09e317ac53c7783a5bf806184459ca0547315a3e0474efc7013b6a +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_19_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_19_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..05bd85b9447ebf97cf433e1cc134989034abbf6c --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_19_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25b4aba7f8ca4491f527d96a4c45e9e5993b70cf4b30ab93b0cf50bffa5be1eb +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_19_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_19_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..719dfdfd55ef064120d8fb8f6e23fbe165ee4180 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_19_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8517e71d0e0b01326fa8be6f7f6fddb6e5676b028b1c49cfdd093a9619a6a7e +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_1_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_1_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..e742da7076c09e46af439ea9637b42b56d771885 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_1_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a35b875153fe746e0eb8b323d4c65ebb1d2f81eb0ac37b7318edf0e22e07a7ec +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_1_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_1_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..2c37ce0cbcff4ac3d44f5a5158a53a4daa6c8d0d --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_1_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0785cf633dc54c5749247bf5d0469025e655b03f9171906187e921cc90e3c8d7 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_20_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_20_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..3495bc92e3f2b6b5fe5ee37d0f5b3aa7c510936f --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_20_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da33bad0c48a72aa2f43deb2ec3eaef0fcba5a6d2cf1644fde2876919abd2394 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_20_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_20_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..2bfa1b2ab2f4a5c2f4134426be259af7e894256d --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_20_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b55d7035a28a6f6d3aab5b15929d177cda90d8a1e92b8409ecedc3b7cc8ded2e +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_21_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_21_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..53edbadfd85f4ac137a2f72e6652b276e6be8bbb --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_21_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f7b5349ab68101a5092c13c2675346c98064751fe1d8a0d001fad915537599 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_21_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_21_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..4ca2ad346fdb89cc0198bc441f76740fb1512939 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_21_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c17b44ddae11426ee4ae992aafc73c2c42009a1f6ed10da6b8f9d2f05a59657 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_22_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_22_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..76cf90897b9e2a55a79757005d8e4437dd7e93f1 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_22_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d6be1a81eee815ea89a4bd154e2eceb07badf8c9de67cf3deb3a94aca7caebe +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_22_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_22_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..48947afc3054d1737a27d83a81da83053d94df16 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_22_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5842e760032baca8071ad29f5b37243b5ebc084f0b594b463818e3e7c4fa4748 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_23_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_23_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..1ff5075b58ed51daa3247ff4fe6a9b702494748e --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_23_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c019124f90c7588f67776273d95f949a0ee99a33ce2559b58ae999fcaefaa18 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_23_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_23_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..ecda94e542144d9642dea06dfc1290024191eb57 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_23_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc1e3a3fda333464d2db0c4a35a2e795503f2865a68bc122c39fd0190a13ab54 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_24_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_24_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..3a8d190e4bda2acd378add039d52426b93b64902 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_24_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a4cea172bfda5818243de6cbabff0636715b054e3931b8abd1a33ec9e0e3bf7 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_24_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_24_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..7cdef0ff575b68dc923dc1f56afb6278eaff84f4 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_24_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21e610206e2b572726ca300b5c1ae6f43452e47aca4162dca9c6c593b734b1da +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_25_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_25_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..80daf49725a07c3d9ee551320637b31cd4c3e104 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_25_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40c9fa3fa752177c4ce1cb4a775d319262f166c7fcfa77fdd02b198981c539a7 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_25_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_25_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..5380bfcf45ee0ece988e003d8d87ff4dbc9dd516 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_25_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:305daad87a3301b0d63ce403bfbb817b472aacd38725018209fb729330939882 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_26_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_26_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..0f737f12cad752a8d69a743bc94d40aa9133855e --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_26_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9bd7941e002cdf64f81957983f3cabb54befa84e3f8ae4aff16e4430462e641 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_26_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_26_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..8b2ee9a440b5381ede61e7212c287e6c339f4d8f --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_26_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe16223e8f1888aef54015477920cfbd848359d897a7682b4e9a744d9d6f4dc +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_27_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_27_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..d475d9cee60164af230fe1c83744c80c772076c2 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_27_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd25673893790e5ed2a629acb755f55043b5ebe702427040a7601663737dba10 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_27_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_27_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..4cabd7b489f115d0de17d8773e647deb7b730f93 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_27_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9149cd4cb1bed5f5ad4f87717897238762ea98bad25d327a8c581704bc756aa2 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_2_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_2_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..7608d12609e9687e1ba4641e1abb08efdb399995 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_2_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82f8dab6c53ca1f36f5c48fd35cd381eaa3098a95ddb573f3d6b7b3f1bfb766 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_2_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_2_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..14b32de7dfc165480aee1ea1c55e23d87225c095 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_2_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4886ff256155ff20eed23400be746699c7a08aefc7d084e60293416e98cef0e +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_3_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_3_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..28e7d46613fb95bb03272af8c818f4b584495e15 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_3_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e5f4ae75e61c0552ffcfdcdb6c58498208bddf182ba201893fd92260bf63daa +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_3_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_3_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..534138d399884b33f6e2beffaac4b47345d34af8 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_3_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2ce81ad749080bbbbc3bfd410f35273d92d0a870850783ef1a4774eb2e5f402 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_4_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_4_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..a49c68271440dd2a9b129ff337ead49f8c945867 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_4_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6fe4d7744b60c86086ab9bc9150b377cf7af9c2104a13c8188e1cbb4db42817 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_4_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_4_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..c3d5d3cf948624191f8b1f0d7b39310dde603df7 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_4_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e6ac7c52b2e81e054858b72277ed6ad3ecfdf07854f97e9394acb7c07abe299 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_5_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_5_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..a41e2d4e44552ec7ffe7ad07651c444727267cec --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_5_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:418abd8a861d7c8398075cc0f2467d5b047b24c10b1410a265f3f81ab897cfef +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_5_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_5_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..d9f0a69ee0da4dc5c232e1cef9da24fd34e45e9f --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_5_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8bc12825bc86d4ce7f33c27bec4a62693e9b76228c978561612bb696914d21 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_6_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_6_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..38a1acdafdc495bc200277a94aed51ed3753dfb8 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_6_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ba5f11808c8f7897cdd4e7b894cc3e5e874e8e4370e6b0c0e1ed160e3d79c0 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_6_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_6_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..82dcb6063dc87b6ab9d47c74cef9db54eb7a904c --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_6_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eacdedc5d8d56ba08447515c9863b0fcbd1a1bab667941d2d5f2f2c983dd01e9 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_7_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_7_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..653b2d8294df7a0d810dd52a4fd3d63951ad5e3f --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_7_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18f3e967103e3e18ba2c8604484c5cde37f2e0c79703e3cdcaaaf37221ac854e +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_7_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_7_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..8f63436c6ce73591e3fc23b5e20dff7c0e3687fe --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_7_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:015aa4a0236306a5e66ff570f96a6c1bb4a0066c876cde14fd14f936b1b9d468 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_8_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_8_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..37066a6ac68c343076bc91e12e48e04fff2fb00e --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_8_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:467eac4335da0d96c9b4744f0d86c64d3f551fe3ee2b3cfdee835d7ff4899275 +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_8_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_8_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..f96e90153478f3d6a3645b49fe018dd3f2478974 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_8_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f7d0e640193eead81c01c22163aa6e7618335b14e3b4f037c2fb2c2b1bab57 +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/layer_9_post_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_9_post_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..09d3d3b05650e2e28c4b6a523b338e65df964b49 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_9_post_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:428c395bbfc657976c7e99b0651385130959134d9ba71ccb32d3915a6939b76c +size 27300628 diff --git a/subscription/macos_ios/engines/mx_modules/layer_9_pre_attention.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/layer_9_pre_attention.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..207db0b6d1741abf6df5504c8220be7970f7f45a --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/layer_9_pre_attention.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503ed9c381fbe97d6b5a40acb4770c658053490207dbf47989f7d780fbcf162c +size 6318308 diff --git a/subscription/macos_ios/engines/mx_modules/metadata.json b/subscription/macos_ios/engines/mx_modules/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..790452729553c25c544c7e25fe5e66c00910d3e1 --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/metadata.json @@ -0,0 +1,1699 @@ +{ + "mlx_version": "0.30.6", + "model_version": "1.0.0", + "export_format_version": "1", + "created_at": "2026-03-11T16:36:58.846300+00:00", + "modules": { + "token_embedding": { + "path": "token_embedding.qlipmx.qlip", + "input_names": [ + "token_or_hidden" + ], + "output_names": [ + "output_0" + ], + "eval_dtype": "bfloat16", + "inputs": { + "token_or_hidden": { + "shape": [ + 1, + 1 + ], + "dtype": "mlx.core.int32" + } + } + }, + "final_norm": { + "path": "final_norm.qlipmx.qlip", + "input_names": [ + "hidden_states" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 1, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_0_pre_attention": { + "path": "layer_0_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_0_post_attention": { + "path": "layer_0_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_1_pre_attention": { + "path": "layer_1_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_1_post_attention": { + "path": "layer_1_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_2_pre_attention": { + "path": "layer_2_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_2_post_attention": { + "path": "layer_2_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_3_pre_attention": { + "path": "layer_3_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_3_post_attention": { + "path": "layer_3_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_4_pre_attention": { + "path": "layer_4_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_4_post_attention": { + "path": "layer_4_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_5_pre_attention": { + "path": "layer_5_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_5_post_attention": { + "path": "layer_5_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_6_pre_attention": { + "path": "layer_6_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_6_post_attention": { + "path": "layer_6_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_7_pre_attention": { + "path": "layer_7_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_7_post_attention": { + "path": "layer_7_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_8_pre_attention": { + "path": "layer_8_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_8_post_attention": { + "path": "layer_8_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_9_pre_attention": { + "path": "layer_9_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_9_post_attention": { + "path": "layer_9_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_10_pre_attention": { + "path": "layer_10_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_10_post_attention": { + "path": "layer_10_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_11_pre_attention": { + "path": "layer_11_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_11_post_attention": { + "path": "layer_11_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_12_pre_attention": { + "path": "layer_12_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_12_post_attention": { + "path": "layer_12_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_13_pre_attention": { + "path": "layer_13_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_13_post_attention": { + "path": "layer_13_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_14_pre_attention": { + "path": "layer_14_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_14_post_attention": { + "path": "layer_14_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_15_pre_attention": { + "path": "layer_15_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_15_post_attention": { + "path": "layer_15_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_16_pre_attention": { + "path": "layer_16_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_16_post_attention": { + "path": "layer_16_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_17_pre_attention": { + "path": "layer_17_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_17_post_attention": { + "path": "layer_17_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_18_pre_attention": { + "path": "layer_18_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_18_post_attention": { + "path": "layer_18_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_19_pre_attention": { + "path": "layer_19_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_19_post_attention": { + "path": "layer_19_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_20_pre_attention": { + "path": "layer_20_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_20_post_attention": { + "path": "layer_20_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_21_pre_attention": { + "path": "layer_21_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_21_post_attention": { + "path": "layer_21_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_22_pre_attention": { + "path": "layer_22_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_22_post_attention": { + "path": "layer_22_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_23_pre_attention": { + "path": "layer_23_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_23_post_attention": { + "path": "layer_23_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_24_pre_attention": { + "path": "layer_24_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_24_post_attention": { + "path": "layer_24_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_25_pre_attention": { + "path": "layer_25_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_25_post_attention": { + "path": "layer_25_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_26_pre_attention": { + "path": "layer_26_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_26_post_attention": { + "path": "layer_26_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + }, + "layer_27_pre_attention": { + "path": "layer_27_pre_attention.qlipmx.qlip", + "input_names": [ + "hidden_states", + "position_ids" + ], + "output_names": [ + "q", + "k", + "v" + ], + "eval_dtype": "bfloat16", + "inputs": { + "hidden_states": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "position_ids": { + "shape": [ + 1, + 128 + ], + "dtype": "mlx.core.int32" + } + } + }, + "layer_27_post_attention": { + "path": "layer_27_post_attention.qlipmx.qlip", + "input_names": [ + "attn_output", + "residual" + ], + "output_names": [ + "hidden_states" + ], + "eval_dtype": "bfloat16", + "inputs": { + "attn_output": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + }, + "residual": { + "shape": [ + 1, + 128, + 1536 + ], + "dtype": "mlx.core.bfloat16" + } + } + } + } +} diff --git a/subscription/macos_ios/engines/mx_modules/token_embedding.qlipmx.qlip b/subscription/macos_ios/engines/mx_modules/token_embedding.qlipmx.qlip new file mode 100644 index 0000000000000000000000000000000000000000..468d35ebd845a467aff97bbb12c3a63e4cbcabfa --- /dev/null +++ b/subscription/macos_ios/engines/mx_modules/token_embedding.qlipmx.qlip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35f43245d43466f0807c3d18dc218a8c43eb7de89fb6a4a1fde042ada60b9bb9 +size 247978508 diff --git a/subscription/macos_ios/engines/qlip_key.bin b/subscription/macos_ios/engines/qlip_key.bin new file mode 100644 index 0000000000000000000000000000000000000000..17e783a1d2c6e3e5ece8cd05c491f75137999c38 --- /dev/null +++ b/subscription/macos_ios/engines/qlip_key.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9a716bcdb001176286bececcef720f6340090a4aebda89affb581cf8939f88c +size 32 diff --git a/subscription/macos_ios/engines/tokenizer/chat_template.jinja b/subscription/macos_ios/engines/tokenizer/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..bdf7919a96cfe43d50914a007b9c0877bd0ec27e --- /dev/null +++ b/subscription/macos_ios/engines/tokenizer/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/subscription/macos_ios/engines/tokenizer/tokenizer.json b/subscription/macos_ios/engines/tokenizer/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/subscription/macos_ios/engines/tokenizer/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/subscription/macos_ios/engines/tokenizer/tokenizer_config.json b/subscription/macos_ios/engines/tokenizer/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d75d3bb5300d205e48769cc1999073ab5971214 --- /dev/null +++ b/subscription/macos_ios/engines/tokenizer/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +}