diff --git a/.gitattributes b/.gitattributes index 56095c5d5a036125dbbb8147e7ccf2e3089beed9..0730de8543c876ba0b7aa2def195d8c7b312ab08 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3192,3 +3192,43 @@ qwen3-4b-log-unary/model_layers_0_mlp_up_proj_weight.sign filter=lfs diff=lfs me qwen3-4b-log-unary/model_layers_7_self_attn_v_proj_weight.sign filter=lfs diff=lfs merge=lfs -text qwen3-4b-log-unary/model_layers_16_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text qwen3-4b-log-unary/model_layers_29_self_attn_k_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_9_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_26_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_10_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_19_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_16_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_5_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_18_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_10_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_12_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_32_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_7_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_26_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_8_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_6_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_25_self_attn_k_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_29_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_24_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_0_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_15_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_35_self_attn_v_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_6_self_attn_k_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_16_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_29_self_attn_v_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_14_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_30_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_25_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_20_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_3_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_22_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_3_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_33_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_32_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_35_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_18_self_attn_v_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_12_self_attn_k_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_28_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_9_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_2_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_10_self_attn_v_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_21_self_attn_k_proj_weight.sign filter=lfs diff=lfs merge=lfs -text diff --git a/qwen3-4b-log-unary/model_layers_0_mlp_down_proj_weight.planes b/qwen3-4b-log-unary/model_layers_0_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..fb6af119da3df56be18a50039b1efd1c0d8c344c --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_0_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb6ebcfd51c12e9664e7784520c5184bee93d40a5546b681c18dc405087698a8 +size 12451840 diff --git a/qwen3-4b-log-unary/model_layers_10_mlp_up_proj_weight.planes b/qwen3-4b-log-unary/model_layers_10_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..6df79c73b19c6c5c2b83dfef1bf94dfd946b9187 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_10_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afcb6e1c532ade2d6a70c79e32aff5fa55fdd09fa4df89cbf51dfd064b18dd2c +size 12451840 diff --git a/qwen3-4b-log-unary/model_layers_10_self_attn_q_proj_weight.sign b/qwen3-4b-log-unary/model_layers_10_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..0a0ed19e1c793b0338ec087fe13f441ba9d7cc27 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_10_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecd8f9b0415810076820dc5ec64d617b2204c3824b2c345bfe767b48920988b2 +size 1310720 diff --git a/qwen3-4b-log-unary/model_layers_10_self_attn_v_proj_weight.sign b/qwen3-4b-log-unary/model_layers_10_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..014d07bdc3f7890a603b23b6355886c905e84f44 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_10_self_attn_v_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac8dbec133671faebe9c6fa3c3eeafdba42732d7d5333a29894bd85b8b30f170 +size 327680 diff --git a/qwen3-4b-log-unary/model_layers_12_mlp_gate_proj_weight.sign b/qwen3-4b-log-unary/model_layers_12_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..6a39fe659bf91b8af3f8d8228957b10c6e05cafc --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_12_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:187dce5b2b27a837f1342c9bfef7ec2574c627eacc9e0a7c9248691b0bdba549 +size 3112960 diff --git a/qwen3-4b-log-unary/model_layers_12_self_attn_k_proj_weight.sign b/qwen3-4b-log-unary/model_layers_12_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..1458ae84d563643a5f016d23879dcc667ca98c84 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_12_self_attn_k_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0552cda17b269b7b6d931c71f808d4a872f35af952c074919efa16b97c8efbe +size 327680 diff --git a/qwen3-4b-log-unary/model_layers_14_mlp_gate_proj_weight.planes b/qwen3-4b-log-unary/model_layers_14_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..bdbd6a8ea8ccb746724e2ac1f10e63494aab192b --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_14_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bccb93e0b20d750585837937ee96c74eb3445a23add0e234ce3b1d5c43b1fa97 +size 12451840 diff --git a/qwen3-4b-log-unary/model_layers_15_mlp_gate_proj_weight.sign b/qwen3-4b-log-unary/model_layers_15_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..21ddb8f86eb8c1960714e815399f5c95a7236e21 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_15_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a3a22bf8ade1a8664ab305cd19454fdd5172e18ec3e4cc42253c3f251b1c5a1 +size 3112960 diff --git a/qwen3-4b-log-unary/model_layers_16_mlp_gate_proj_weight.sign b/qwen3-4b-log-unary/model_layers_16_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..9bbb6e4ed9a57d8fefe9af2bcee1e79409f10f0d --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_16_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2334869db4457a5e1771f4e8e133315e264cfa8fcef081bf3e74e5bb486755a9 +size 3112960 diff --git a/qwen3-4b-log-unary/model_layers_16_self_attn_o_proj_weight.planes b/qwen3-4b-log-unary/model_layers_16_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..419fe4aa9d5926298c55db2f91ce03fd44491b1b --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_16_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8804696a1c084cde3433f4e46fc019b22d2f4ea9cce716260ac6bd562ff8c07 +size 5242880 diff --git a/qwen3-4b-log-unary/model_layers_18_mlp_gate_proj_weight.planes b/qwen3-4b-log-unary/model_layers_18_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..5a01819d0a2bdf8edffa15bf2c990806c19769ec --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_18_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17dfc2e70f98f22122fe39207c56fc5e6d8a13d0801a1f19766d63734112c431 +size 12451840 diff --git a/qwen3-4b-log-unary/model_layers_18_self_attn_v_proj_weight.sign b/qwen3-4b-log-unary/model_layers_18_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..240f807c3debbf9e0acc317c284dc6dd54398729 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_18_self_attn_v_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e87436148d711d6ae4391be29e19f5e68654d209098e4e8d8a3da43392be0c9 +size 327680 diff --git a/qwen3-4b-log-unary/model_layers_19_self_attn_k_proj_weight.planes b/qwen3-4b-log-unary/model_layers_19_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..cd7178f9833bb8c7f3569176fde7e7a5c71703f3 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_19_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c292b6095cbdfb1ad83d6cbf48e5b7ccc6dbef67d3ac53f25550a1d98d4f5e51 +size 1310720 diff --git a/qwen3-4b-log-unary/model_layers_20_self_attn_o_proj_weight.planes b/qwen3-4b-log-unary/model_layers_20_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..533d20f3b804ca2ecb3e8bb9a1a3189936d65dae --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_20_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb5a83217003aab411550fc4b42a6d94906dc4d08a4570b83d7891edc56515b +size 5242880 diff --git a/qwen3-4b-log-unary/model_layers_21_self_attn_k_proj_weight.sign b/qwen3-4b-log-unary/model_layers_21_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..6138883895d1df220fc67bfc351b3415f2b3b602 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_21_self_attn_k_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3a1f7a008d419057c338922cc169e9c553beccda3331e0a34b17af8de185064 +size 327680 diff --git a/qwen3-4b-log-unary/model_layers_22_mlp_up_proj_weight.sign b/qwen3-4b-log-unary/model_layers_22_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..d22edfb28303df520caec7ba0f4464868bff3a16 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_22_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8196215d1e4979cc12010dac3aaa9197436dfe11a2d723d5d2ebfce526c2fcae +size 3112960 diff --git a/qwen3-4b-log-unary/model_layers_24_mlp_up_proj_weight.sign b/qwen3-4b-log-unary/model_layers_24_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..b680b0e893af6bd86e8083ae98d78eb3f85624b9 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_24_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d239accd26f1411d5b96b532b92b13e136c1b71b0383f1965dd0b1a91d45ef77 +size 3112960 diff --git a/qwen3-4b-log-unary/model_layers_25_self_attn_k_proj_weight.sign b/qwen3-4b-log-unary/model_layers_25_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..30b5ae9ddc1a553f5eed084f952b1eb88cb9e860 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_25_self_attn_k_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2bc23efb2c9794853b4dcb832c209eb28fa690c26d6fb19c89bdad0cc8a53f8 +size 327680 diff --git a/qwen3-4b-log-unary/model_layers_25_self_attn_q_proj_weight.sign b/qwen3-4b-log-unary/model_layers_25_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..10101d434ffb4d38a5d6b552fceb989a0ea90fdf --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_25_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d3bb22d5874a3fbb0be5d53d39e94ca2d192266033aa419839312d164400afb +size 1310720 diff --git a/qwen3-4b-log-unary/model_layers_26_mlp_down_proj_weight.planes b/qwen3-4b-log-unary/model_layers_26_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..2cd9755c001e016c9fcc3f79a0204d64b8956e45 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_26_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b369605e7dada4b73e3b4c8d482d25907112caf43ca8571246022194ad53bbf +size 12451840 diff --git a/qwen3-4b-log-unary/model_layers_26_self_attn_o_proj_weight.planes b/qwen3-4b-log-unary/model_layers_26_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..3d1ec9d474b850978913344857058b2675ad06e1 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_26_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a71e981ac077961ed9092698aa0568003df71e3a45e08c9c2663f27a6b5efe +size 5242880 diff --git a/qwen3-4b-log-unary/model_layers_28_mlp_down_proj_weight.planes b/qwen3-4b-log-unary/model_layers_28_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..78290b16a38cca534fc416fdd7fe2e7003ab044e --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_28_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d2308265dcaeec3c4ca55b299b4b8a464742674eef366b31f8a80ad72e2a15f +size 12451840 diff --git a/qwen3-4b-log-unary/model_layers_29_self_attn_k_proj_weight.planes b/qwen3-4b-log-unary/model_layers_29_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..8eae3e3cc473a2c96b3b23ddedd8022f46d2534d --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_29_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10db65fe1089b6e46286994b9f9343a00314cb084d309c379e4925724f10099d +size 1310720 diff --git a/qwen3-4b-log-unary/model_layers_29_self_attn_v_proj_weight.sign b/qwen3-4b-log-unary/model_layers_29_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..6c56fa128b50653840463d7795ed03e5345cf6dd --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_29_self_attn_v_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0bfe5bfdc32e818eb29056d2695a4205d1967a97a32ab2ca92eaef46d0088af +size 327680 diff --git a/qwen3-4b-log-unary/model_layers_2_self_attn_o_proj_weight.planes b/qwen3-4b-log-unary/model_layers_2_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..25b7f0020847cdb23b6603fcdbab2ff578ef4a27 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_2_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b1b8163693eb624a5ab65fa860ded66135bf57bda54dcc0d5640486c003aabc +size 5242880 diff --git a/qwen3-4b-log-unary/model_layers_30_mlp_down_proj_weight.sign b/qwen3-4b-log-unary/model_layers_30_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..93901791d2639bd392cbb92a536f03f753e0ef08 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_30_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e63532dc97ad46321c93eb97292d22fdaf0e7fe2ce5d57215773ff5d5044cfd +size 3112960 diff --git a/qwen3-4b-log-unary/model_layers_32_mlp_down_proj_weight.sign b/qwen3-4b-log-unary/model_layers_32_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7b0d48825866632f4d48c9f2b1ddfeddf2b66902 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_32_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c9c406c9d5464bbb91785e485aee7902bb8ffdbbc97c8b98ae0b09abddf16d7 +size 3112960 diff --git a/qwen3-4b-log-unary/model_layers_32_mlp_up_proj_weight.sign b/qwen3-4b-log-unary/model_layers_32_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..05f572c48510a920ecdaf483cfd962c1f668a2dd --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_32_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2169cb5aa76cb8f67e10c3468390409a71ae354c3c6bb09fb29c830d53d0a11f +size 3112960 diff --git a/qwen3-4b-log-unary/model_layers_33_self_attn_q_proj_weight.sign b/qwen3-4b-log-unary/model_layers_33_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..e77606992597bb97a26892214e99120708da37f9 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_33_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:187bf5c5b3e5983e28f86fc0ed154585add4f5f5ac16fafba07c34b30f8daf3f +size 1310720 diff --git a/qwen3-4b-log-unary/model_layers_35_mlp_up_proj_weight.planes b/qwen3-4b-log-unary/model_layers_35_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..ca0000cb7e99ea637b98f8038736ad4176fbfe65 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_35_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a9ffbb1c69744d926f2625f057a53108fa6fbbb2a06e6a3be69654d499767fc +size 12451840 diff --git a/qwen3-4b-log-unary/model_layers_35_self_attn_v_proj_weight.sign b/qwen3-4b-log-unary/model_layers_35_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7ac3d953c0cd9e2fffa6a7f757ea616ff5dafd57 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_35_self_attn_v_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:050e344abece88e8240ee0b8d00465873656959d34ea4ae6a0fda734149968ff +size 327680 diff --git a/qwen3-4b-log-unary/model_layers_3_mlp_down_proj_weight.sign b/qwen3-4b-log-unary/model_layers_3_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..3fb4b3f90717c913c3c7b85c761b0671dfbcfab1 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_3_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b824ccac8c88f208dc8574477cf9149a50bb74f8d5d060fc6bece4ab15b61963 +size 3112960 diff --git a/qwen3-4b-log-unary/model_layers_3_self_attn_q_proj_weight.sign b/qwen3-4b-log-unary/model_layers_3_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..ee5af0a10b956e0ae38d20f4e464b4663da69b87 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_3_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a98c6ed0dab01d444134169f1e5fb33e060a9ebc0a927113f7283813296a3f9 +size 1310720 diff --git a/qwen3-4b-log-unary/model_layers_5_mlp_gate_proj_weight.planes b/qwen3-4b-log-unary/model_layers_5_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..6d9af0b17865dd9545f5bc353a5342d5f7f8e588 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_5_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57efc7ed7fc19b8bfb426e89a7d01dd61dcc769f5a2bb9dcb4830af94caa7011 +size 12451840 diff --git a/qwen3-4b-log-unary/model_layers_6_self_attn_k_proj_weight.sign b/qwen3-4b-log-unary/model_layers_6_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..4b4efa392b8d2696cafa7869ec090bda70f2dc53 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_6_self_attn_k_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f9ed2b48faabee7f312264c1a302a540f0da9d16a75371e0f612fa3621c884 +size 327680 diff --git a/qwen3-4b-log-unary/model_layers_6_self_attn_o_proj_weight.sign b/qwen3-4b-log-unary/model_layers_6_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..b97c312a2a83b34bbe513832d6b6df7135093e9c --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_6_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d97c0815db5ac304702fd7022798c0e610dcc7f7827fe9e9ba1cc51dac13fa20 +size 1310720 diff --git a/qwen3-4b-log-unary/model_layers_7_self_attn_k_proj_weight.planes b/qwen3-4b-log-unary/model_layers_7_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..837830716452af0097963e7bd98b970ad7048aff --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_7_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7187d183cddd66358c0ac41ed1b27ce2ca80e818ae13d236a20cf2b94d40d19 +size 1310720 diff --git a/qwen3-4b-log-unary/model_layers_8_self_attn_q_proj_weight.sign b/qwen3-4b-log-unary/model_layers_8_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..02e1ef168ca1e691daecdb6fa815d24505aefe9c --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_8_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98b4b33e06f0c93a0c72763fad53f90c8fc8fd0d747b6cdabf79e2fb6ca92571 +size 1310720 diff --git a/qwen3-4b-log-unary/model_layers_9_mlp_up_proj_weight.sign b/qwen3-4b-log-unary/model_layers_9_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..f29723be23a8d3dbbb85b12f5803a7a5c84e35c3 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_9_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dc51de55e7ecbf74c3204d66801e622cbfd2df6af163d387ffcb7426394d3e9 +size 3112960 diff --git a/qwen3-4b-log-unary/model_layers_9_self_attn_k_proj_weight.planes b/qwen3-4b-log-unary/model_layers_9_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..3eb3a9a34bce366f59df288da2bbdf20dc178289 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_9_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:329445337b8f7adf239945384532f1955ac39c398e5b3a8227d25417a1ef36aa +size 1310720 diff --git a/qwen3-4b-thinking-unary/model_layers_11_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_11_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7471a25a42b831b5633547308681b3dce3261047 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_11_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_13_mlp_gate_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_13_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1e65854a2f81885dd37f9149cb354e80d14344b9 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_13_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_15_self_attn_q_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_15_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..724ad34147a4055d8b0ca3ccae35af69e72fa596 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_15_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_16_mlp_up_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_16_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..677e6b612d23882c44276836d24b35621f707d95 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_16_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_16_self_attn_v_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_16_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3f7fc6de8c7192a25396da102d4d91fbdf8c4757 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_16_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_17_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_17_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..30607ca8d38d55395d2661c0c11b87ba5431453e Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_17_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_19_input_layernorm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_19_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a67441406b0a13a3b88da5a65454f45ccfbafb53 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_19_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_19_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_19_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7a8031fbb290cd98f695f89a6d601f1b52c73bd6 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_19_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_1_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_1_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8b0da508f1e6a41ae223679d620165e2d1684cbf Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_1_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_21_self_attn_v_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_21_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b41f4012eee72ebe776a37f0d2bc4152a5d12214 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_21_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_22_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_22_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..061682e0604a7d8014143c4cd29d498fc4746c58 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_22_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_22_self_attn_v_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_22_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..bf79f2cce2a101b93289b3940b324be0f938e209 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_22_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_25_input_layernorm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_25_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c3b883c85234fbfda9408486c19136177f7a8179 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_25_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_25_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_25_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9f3bb530010a8cb04fb25c8f3febad11a0489010 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_25_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_25_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_25_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..18e52463fdf7e75179c70ba57473f1df702304fb Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_25_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_26_post_attention_layernorm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_26_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..8b5351749cde7cbe8e766144e43738c05214a7b9 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_26_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_26_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_26_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f2f53e6fccc870f5b53fc940d99a35bd577d8906 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_26_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_26_self_attn_q_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_26_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..49b8bf1ac6b0c03599da0b9a5e32029ffd4cc1d6 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_26_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_29_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_29_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..427254e150941f1bbaf0ac148da9546cc371153a Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_29_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_29_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_29_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7caba9391b00e94bdde446190f1f53a06afcec05 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_29_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_2_mlp_gate_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_2_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2d2b90d7504d1dc766ce4a905cdce93b82fdf7a6 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_2_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_30_self_attn_k_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_30_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c2940d94f0e770bd352e87ef61e715e95cfc1ded Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_30_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_30_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_30_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..55d85b1085cad522ce1db381e8aee358462163db Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_30_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_31_mlp_down_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_31_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..899ccced1c1b98e6cfca1145a6327ca991e210a0 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_31_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_32_input_layernorm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_32_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6f84265fb39beb01bb0ef18d4e6512532f4169ea Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_32_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_33_mlp_gate_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_33_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0148d37379217017120f24442125d664ba10fbb6 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_33_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_34_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_34_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1309afc6035eab8735d49f74b00e74d8cee22870 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_34_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_34_self_attn_q_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_34_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2a17d5795dc3e6da1e75be9160a42f0deeb419e1 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_34_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_3_mlp_up_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_3_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f999ff99912e872f9b3bbf77ecfff81b08631974 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_3_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_4_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_4_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3286bae0b8ab5f93227ba349b9eb2de7a33c63bb Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_4_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_6_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_6_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..acbba0dcd52b4c8054a78a469d59495df2017c73 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_6_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_7_self_attn_q_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_7_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3b96c8c921f14b44298e503d46ee16392ab3e772 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_7_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_8_self_attn_k_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_8_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..128450801eff49c63339396743dfdb6262885a03 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_8_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_9_self_attn_k_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_9_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ed6eb308122b8f6fb0a442d1865b87ded5ad94bf Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_9_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/tokenizer_config.json b/qwen3-4b-thinking-unary/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8dec7d58dda385ce95b469aa2d277ec162168e58 --- /dev/null +++ b/qwen3-4b-thinking-unary/tokenizer_config.json @@ -0,0 +1,239 @@ +{ + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '' in content %}\n {%- set reasoning_content = content.split('')[0].rstrip('\\n').split('')[-1].lstrip('\\n') %}\n {%- set content = content.split('')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n\\n' + reasoning_content.strip('\\n') + '\\n\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 262144, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null, + "add_bos_token": false +} \ No newline at end of file