diff --git a/early_exit_20250817_layers_5_kl0_25/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_25/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2457b6c6de83d3ba8738bf82961275f19ba89133 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07837de4aac568f367b773a9fa7c83f63f00f79a6d55285ede23a8cd33f7e90b +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_25/step_1000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_25/step_1000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c6a4ca40657ea68534d8178a676a5d43156b46a --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_1000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010178105f916551ad0e5d249749cd4f1803a83364379b32ee016abcb12fb621 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_25/step_1500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_25/step_1500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..767d2bbbd920aa0da5676162d67e81d664d6b124 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_1500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c6b229cea6a622adad4cd83d103ca4db4774600fa328fdb6e11c63d006f7b5 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_25/step_2000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_25/step_2000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9334f0f277a7546bb5e7756d06d3611090467a4 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_2000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a1bad52e342572ba7f37faacbbb6db8850d2804f115672f53ebb2bacae55118 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_25/step_2500/early_exit_probes.pt b/early_exit_20250817_layers_5_kl0_25/step_2500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3cc00a04ed950e3113e419d288b894688ff67d3 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_2500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ce5b5621baa8bc69de1301911eb87994c562c78e7a870e957f14154a691096 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl0_25/step_2500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_25/step_2500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7bfd2bd96ca0caa2a83e192d16a4e0b97183029 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_2500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fca6ee0bfcb5f572d767cd95f743813ffa219b31e9ca7769cd597cdf90fb555 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_25/step_3000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_25/step_3000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aefd46a26ebb4dee40baebd513995df033a91d11 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_3000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ea8e8ac529c747fee3326a8bc3c0af36e9749fa7226346dbf2daf5d804abdc +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_25/step_3500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_25/step_3500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a01e47507dcb71dc0d9dbd06f32f2e2b1e0dc702 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_3500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:273af2867436e58e50844b183f496953a4322f2106174f65cdf8f4752710bfac +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_25/step_4000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_25/step_4000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92bc9b829c8b666cf302dcd246662137e5c18d00 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_4000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6c504a4c9d3352c31e0583c783347cbea637d3ff9fcf44bcfacf13844cf2d5c +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_25/step_4500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_25/step_4500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8cd6f1e4ae0a3b06062046513a9dce7c9ee2d83 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_4500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04963cfbcdc9cd2dc389e7e28a770239911ce9b334576d08b2893eb1694ee9b5 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_25/step_500/early_exit_probes.pt b/early_exit_20250817_layers_5_kl0_25/step_500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..fca03244d525669c58b6ff8d26218be5b5ca372c --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f05a8849df30e7f4627109595f9ff10dede74411779cb8d35331b0bb76d19677 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl0_25/step_500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_25/step_500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b018ee8883c78dde983ae79043135a03746986b --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f360088c05bc92e8d3c4eabc91db63a3e2724e36c03e275f5cac29a042c09f0d +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_25/step_5000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_25/step_5000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7d0d34b569d20b9ea835ca1c14cb1554816892f --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_5000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f428af5b13e6cc0afa86eb5ce37f81fe889be900ce84bbd46e0e4e322327284e +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_25/step_5500/early_exit_probes.pt b/early_exit_20250817_layers_5_kl0_25/step_5500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4e4e51a73058ebd3f12dc103ae37e9aab4e636d --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_5500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74449800989026130ff27bcae1274e725e5d5ca92814d200c2e706d9661a7107 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl0_25/step_5500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_25/step_5500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2398017c5e8a30f9cded81729bc7e74ad9fd0cdf --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_25/step_5500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45f16f652cbae6a07047c1a4051ae50fb9a5e60bc186065030005485e8d0caf8 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_5/step_1000/early_exit_probes.pt b/early_exit_20250817_layers_5_kl0_5/step_1000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..476ef12bcbe791f75aa68fe3e69217c31d91a35c --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_1000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33738442e1247e815fb2c500345319a5563d04000652fb2e24ec3b99631a06d7 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl0_5/step_1000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_5/step_1000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a0a7a5c42f191a85ba60738a5b43609aeae30b4 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_1000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d3fe9a2492170a213e876e838e3694e7e11842a1ce16a69a3ed4abd97c25142 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_5/step_1500/early_exit_probes.pt b/early_exit_20250817_layers_5_kl0_5/step_1500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a66a539b9e71e502c4d5c99fd64ba6078849789 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_1500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4fe2e83d79187c01397242bd31b08951ec9a17c2aa4835fbf217ae214dd6d30 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl0_5/step_1500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_5/step_1500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f85c490e436a1aa99e5355a87eeeee686d07495d --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_1500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4238b92fcab22412d76d75193cd25ef6ecce70cd3e73786b9a0a08e95b7827f4 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_5/step_2000/early_exit_probes.pt b/early_exit_20250817_layers_5_kl0_5/step_2000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e8da511b7475fbb84bbd85290a97560c4b0d166 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_2000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e05a05fe400df853f1ef976108852b8cfe0295578167ed8d2a111866ccae22 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl0_5/step_2000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_5/step_2000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91cbaa38c4a54af914bdc55679538639b48ae649 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_2000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fddb33783b5d1b9cf4a8746b02ff240ae871abf2dce3330173431ad59361248 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_5/step_2500/early_exit_probes.pt b/early_exit_20250817_layers_5_kl0_5/step_2500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0ba5b803677474ba27dbfe272ba563bc4f5ddba --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_2500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91c0aab502ac28109143132ed98849852ecd804a375003ebbd18fe1e52923b32 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl0_5/step_2500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_5/step_2500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21a6266e152144fdc3a7f95ddb4e4035dfebedb6 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_2500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9971aa1129ada0584e6c8a881b75d64dc119a3651edb778fd0b1ec9e46ec7da +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_5/step_3000/early_exit_probes.pt b/early_exit_20250817_layers_5_kl0_5/step_3000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..6291852ace231623bc3c30ede59f864d31f4cd83 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_3000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a8bada79a15e7fc2e3273cdc44976607af137b163ba7c4d6b2ffa68f21c7953 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl0_5/step_3000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_5/step_3000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bab4a716703faae3d62c8417d9be076453fed6d4 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_3000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:327823a4df4fc17121bca57111c7eae599c1f1a8da3557fe6062b04ef0f15218 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_5/step_3500/early_exit_probes.pt b/early_exit_20250817_layers_5_kl0_5/step_3500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..6625d5805aa70169cb18959ccda9aa36fbff3571 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_3500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab59da6ad5bfc19e5a30e46d734f6a96c6c39d0944edd864d2e2e9846d739865 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl0_5/step_3500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_5/step_3500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7984987cea7e02018c9b2217b21388fc395e7c3 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_3500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05631d45d62b040edcaa295a2ac561aa67530432eaec2821d182d533d66ceeee +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_5/step_4000/early_exit_probes.pt b/early_exit_20250817_layers_5_kl0_5/step_4000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..577d7d591ec215e33cd4b18046040677eea51e58 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_4000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90bce4068ab86d26f73141badfd7c856f5c9b1d1ccf8087fc4a1e6eff1043f8e +size 94502251 diff --git a/early_exit_20250817_layers_5_kl0_5/step_4000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_5/step_4000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbcf80a64e753c8954820a039f5776f62ec89a53 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_4000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f6fca8b465a580576bb9f3ac7fa32011667086d054c3183f59159d1f3349b2e +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_5/step_4500/early_exit_probes.pt b/early_exit_20250817_layers_5_kl0_5/step_4500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c9f18f6cbc55c222e13b4a85813231330acbeef --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_4500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fadbb614b7b2e1ae2a48dbd34e074dbab0461f72fd0d28db48104551f001abb8 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl0_5/step_4500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_5/step_4500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8816e7aaf7ae0d168d1ea99005b760c1ead5d673 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_4500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feac8841f6756b1981e6a780263c2680488263b65383c3b489cab58ee633cb9a +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl0_5/step_500/early_exit_probes.pt b/early_exit_20250817_layers_5_kl0_5/step_500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..38e6d433b13d292716977a95ea768f6c9b565c75 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233b532b92f23ae96259c7f005df971212582b60258c503bea00d10dee942ad5 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl0_5/step_500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl0_5/step_500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cd62d747a5f82559cb70e2f828efd4a27bc5d96 --- /dev/null +++ b/early_exit_20250817_layers_5_kl0_5/step_500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cbf678583da0b9b3730d4d0cc24bd3d8fa2717f8bb7e850c073eee856ece348 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl1_0/step_1000/early_exit_probes.pt b/early_exit_20250817_layers_5_kl1_0/step_1000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4b59c6e313f764b9662a7496b8632d4fd63782c --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_1000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d35f90cc59f64cb79f5adca290507cbe08e0633d63d38c295c99e6062c78f2af +size 94502251 diff --git a/early_exit_20250817_layers_5_kl1_0/step_1000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl1_0/step_1000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee00efc17dab61267ce7b792fb7c278b2c73bed4 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_1000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1546154ad4bd130bcfbcad7a9a31becf48dea2522da6ad21ee711c0e3ca8cacc +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl1_0/step_1500/early_exit_probes.pt b/early_exit_20250817_layers_5_kl1_0/step_1500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a3640b9a8e6373a9e87628b9f4d8deb7dfa1c08 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_1500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d8a81f3799794e66bb9b4bcea4f0d4e8b397eef8f497bc18e34fcb35a64477e +size 94502251 diff --git a/early_exit_20250817_layers_5_kl1_0/step_1500/early_exiter/adapter_config.json b/early_exit_20250817_layers_5_kl1_0/step_1500/early_exiter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3413e05cebb398a29a50b0bd9455aed509c56566 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_1500/early_exiter/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "o_proj", + "v_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/early_exit_20250817_layers_5_kl1_0/step_1500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl1_0/step_1500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78e4d5624fdeca34ecab91d80b6d8638e3192a70 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_1500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c07cad6c04a6b209c5d30f349968e875250093ebacc2f5064fa5abe35d2c47f +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl1_0/step_2000/early_exit_probes.pt b/early_exit_20250817_layers_5_kl1_0/step_2000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c4f7c3226c4cc2d1c8060f0605e11faae631242 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_2000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab939b589ea04dccc46699e69076fbcb4e563be64155220671927656e11ef621 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl1_0/step_2000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl1_0/step_2000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1adeed3c45b4ac47390a62c4663d0b039a33f2d5 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_2000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8aba5d4d6e46b2e580e3d233607894b8f0064fcb1d6cf3565a747de5f1abb8a +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl1_0/step_2500/early_exit_probes.pt b/early_exit_20250817_layers_5_kl1_0/step_2500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e091f15333ade706527e65ebdcd595e6404fc64 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_2500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3d0c199a29a0bd6e6e3d36923f986dc9e50cea94ca49c5f760b5aa9afa8ed47 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl1_0/step_2500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl1_0/step_2500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f5894328cdb4a7f9431d422e0666540220fdd18b --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_2500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90793ab06310160794c6cdaba44f64957b59ee903ca487c7e28ba9bfd3c2b970 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl1_0/step_3000/early_exit_probes.pt b/early_exit_20250817_layers_5_kl1_0/step_3000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..640784c54b4925717a6d76d55c840d415760674c --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_3000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0bc46a32522eeb22fc65dda72ec2ea23a26d524ee6312685b2cee5999431f42 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl1_0/step_3000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl1_0/step_3000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..581e5bf80eade659c95eb48083db124713f7d453 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_3000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b75ffc50b1284f9b6ce5897a58dfa59448c51a4ca3b92fb3c68d50a7e4f9e2dc +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl1_0/step_3500/early_exit_probes.pt b/early_exit_20250817_layers_5_kl1_0/step_3500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..175581cbf75b5635abbe745eb5c697d5de96459a --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_3500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e798c99ca4ef518c1ed0553c037dab3cccab3424a51d894cdb84d1b6c5d6773 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl1_0/step_3500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl1_0/step_3500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ed050126466728364c26665d923549670a8bf5d --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_3500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3bad102aabb761b5ef950fb7864d5ffb697f128e45d37e97d331b1502e0de78 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl1_0/step_4000/early_exit_probes.pt b/early_exit_20250817_layers_5_kl1_0/step_4000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..df3b16e7d749636b529482f906b979e24ef6d3e9 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_4000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aee0950af96d5b2e7519d64e713ac98af1b6df3714f39612874b686099f87c18 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl1_0/step_4000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl1_0/step_4000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d71868f177a08f82aae48e4f44b4315f00bcbf2 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_4000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38eae4c6bbce7689260103bf71cd539d4b887c11dd61243026feb7972fc2b1ec +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl1_0/step_500/early_exit_probes.pt b/early_exit_20250817_layers_5_kl1_0/step_500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..350178442409912b52191eab424221e4ec4d6c24 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:463833d32818b0f56ee21cb0057493ee25c4500b2f66051075ca79a27894df29 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl1_0/step_500/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl1_0/step_500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..926560a88ece4258068b21ebf6ce7c18b49f59b3 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0/step_500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68fa34fabf333552510d2cb3a2fee13a55f30ac4f9d0b5eaeefb9c946c3cb8a6 +size 1884453408 diff --git a/early_exit_20250817_layers_5_kl1_0_rank8/step_1000/early_exit_probes.pt b/early_exit_20250817_layers_5_kl1_0_rank8/step_1000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..65e6b61ebbaf0011e0caf3de6184e82938f3f533 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0_rank8/step_1000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f75ffbaea7390f197d644bc3608b02731e7820c4121fab094c3cefc863775b7 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl1_0_rank8/step_1000/early_exiter/adapter_config.json b/early_exit_20250817_layers_5_kl1_0_rank8/step_1000/early_exiter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e9116cf5f47dbdbf5101cf322fa6ac870a51060d --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0_rank8/step_1000/early_exiter/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/early_exit_20250817_layers_5_kl1_0_rank8/step_1000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl1_0_rank8/step_1000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e763405024b1e877661c3b3acabcae6a01c3ca1 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0_rank8/step_1000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb8ba4607b1d0c0a675661b898e2fd5e48f19f7155c00580ce8776df94e5fd4 +size 1875736896 diff --git a/early_exit_20250817_layers_5_kl1_0_rank8/step_2000/early_exit_probes.pt b/early_exit_20250817_layers_5_kl1_0_rank8/step_2000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..86a1c542a0f2d39e416b81c231bfa5ccbb5232b9 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0_rank8/step_2000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f9fc755411bced0c877579f73136071e8ce8bd03670fba0708ac730e96ff13 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl1_0_rank8/step_2000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl1_0_rank8/step_2000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9395dc13fb58907ccae96763adb57110a4709cda --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0_rank8/step_2000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80cd4fed507cb81615ac9da7b696453c15b28ba3e888369ba09c82e92342c4e7 +size 1875736896 diff --git a/early_exit_20250817_layers_5_kl1_0_rank8/step_3000/early_exit_probes.pt b/early_exit_20250817_layers_5_kl1_0_rank8/step_3000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc0db63de813c9db9803061032da67ecbd1960b4 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0_rank8/step_3000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a93b6974addada5137c319602ee5e19c6fcb632c70ea3f1014bd553ef55a03 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl1_0_rank8/step_3000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl1_0_rank8/step_3000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19748f0ef53be725357d4e6fa493744ea81d98a7 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0_rank8/step_3000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8da1ec24895e6ab0685adfb2daefa6f13d130b2d7e53243eab1a34ec995ea44c +size 1875736896 diff --git a/early_exit_20250817_layers_5_kl1_0_rank8/step_4000/early_exit_probes.pt b/early_exit_20250817_layers_5_kl1_0_rank8/step_4000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3bd40da0bcc7bc0bef629b3a96dc5145dffd3b5 --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0_rank8/step_4000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feaacaccecf75cf2388a19129564f85d62baa723619722280736bf1dbc977546 +size 94502251 diff --git a/early_exit_20250817_layers_5_kl1_0_rank8/step_4000/early_exiter/adapter_model.safetensors b/early_exit_20250817_layers_5_kl1_0_rank8/step_4000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..236e7cb176f8f17171a4a4f3ff4eefa34030e7be --- /dev/null +++ b/early_exit_20250817_layers_5_kl1_0_rank8/step_4000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d00c0bc5282ceba103f972cca6057631a98b6c57388a837e6a989d3aafca5de3 +size 1875736896 diff --git a/early_exit_20250818_layers_5_kl1_0/README.md b/early_exit_20250818_layers_5_kl1_0/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb0893c2a4d79d9bc98c4f39e5b899f11c962dfb --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/README.md @@ -0,0 +1,207 @@ +--- +base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.0 \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/early_exit_probes.pt b/early_exit_20250818_layers_5_kl1_0/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..0390ae791c0e859504cb57e5089062c206f0d9bc --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c848725b820a88040d8ef47836383b83baa108da0c1d7b2b6177abc65c3498 +size 94502251 diff --git a/early_exit_20250818_layers_5_kl1_0/early_exiter/adapter_config.json b/early_exit_20250818_layers_5_kl1_0/early_exiter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3413e05cebb398a29a50b0bd9455aed509c56566 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/early_exiter/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "o_proj", + "v_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/early_exiter/adapter_model.safetensors b/early_exit_20250818_layers_5_kl1_0/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a67de84416e5e710a0ccfb2d0511e61b2a052e1 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9d8f542a9a96dbf7983fa134ad535f7eee02d8162be4b8b450e4d576250519a +size 1884453408 diff --git a/early_exit_20250818_layers_5_kl1_0/step_100/README.md b/early_exit_20250818_layers_5_kl1_0/step_100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb0893c2a4d79d9bc98c4f39e5b899f11c962dfb --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_100/README.md @@ -0,0 +1,207 @@ +--- +base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.0 \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/step_100/early_exit_probes.pt b/early_exit_20250818_layers_5_kl1_0/step_100/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..16b802634ab7719a0fa2b62c49793d38d435054c --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_100/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac52f57797a315cc1515a72a4498ad0d4ce1a48394d3c2fd0f485755f2f84acd +size 94502251 diff --git a/early_exit_20250818_layers_5_kl1_0/step_100/early_exiter/adapter_config.json b/early_exit_20250818_layers_5_kl1_0/step_100/early_exiter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b14a1f397b567057c5b3a21b52a67eb75ac69d8c --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_100/early_exiter/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/step_100/early_exiter/adapter_model.safetensors b/early_exit_20250818_layers_5_kl1_0/step_100/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc43f0b33e77c165a5040328f2fa5bfe90bfa5b3 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_100/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f180b0118e8c2385b62813be5a92dca2a6aebff8c6dbd2010d7c77a6a66ab508 +size 1884453408 diff --git a/early_exit_20250818_layers_5_kl1_0/step_100/metadata.json b/early_exit_20250818_layers_5_kl1_0/step_100/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..4453fca5915cf996beb71cc25dfa4e16aa14609c --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_100/metadata.json @@ -0,0 +1,13 @@ +{ + "base_model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "exitable_layer_idxs": [ + 5.0, + 10.0, + 15.0, + 20.0, + 25.0, + Infinity + ], + "total_exitable_layers": 5, + "has_early_exit_probes": true +} \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/step_2000/early_exit_probes.pt b/early_exit_20250818_layers_5_kl1_0/step_2000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..206dbe58e22e459300df83583737ac4587ad0ca0 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_2000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a75853c38d136f7ae8e88f641a14828d3fed6723d633f8030a17e7fd70ea8d2 +size 94502251 diff --git a/early_exit_20250818_layers_5_kl1_0/step_2000/early_exiter/adapter_model.safetensors b/early_exit_20250818_layers_5_kl1_0/step_2000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..317bde6ef8f893c35043d6d3a63010deb60bee48 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_2000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00b9303ba75f33d7f282e1bcf9e7ae9916047aa49b9061211ec7f0551548ab78 +size 1884453408 diff --git a/early_exit_20250818_layers_5_kl1_0/step_2500/early_exit_probes.pt b/early_exit_20250818_layers_5_kl1_0/step_2500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6a8de4bd4dbce3bf2ee12d9173df280ab1d81e1 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_2500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0399d565a5f0548174a6a806b4a4ef426b33cd61558f7f4f623379a073839ffb +size 94502251 diff --git a/early_exit_20250818_layers_5_kl1_0/step_2500/early_exiter/adapter_model.safetensors b/early_exit_20250818_layers_5_kl1_0/step_2500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1da829a95cf47ed57d728fcaa4dc38a3edde677d --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_2500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6db7ed96a849eb3ffa848e9a1ecbc9dccef938a54fdf1155178308f3423989e0 +size 1884453408 diff --git a/early_exit_20250818_layers_5_kl1_0/step_3000/early_exiter/adapter_model.safetensors b/early_exit_20250818_layers_5_kl1_0/step_3000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b74263149630f9dda242f9b9c4d4f65fd7880204 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_3000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6481afcf339f72aece3854daa682f37686530f46e0b8b6a812780a8e1e167856 +size 1884453408 diff --git a/early_exit_20250818_layers_5_kl1_0/step_3500/early_exit_probes.pt b/early_exit_20250818_layers_5_kl1_0/step_3500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc8bf0a050dba8b0c2504a38d8c041a945229c02 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_3500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:886f70693f31f3e37ca12dc3486844e4b2756a2494bdbb743da80420feafb821 +size 94502251 diff --git a/early_exit_20250818_layers_5_kl1_0/step_3500/early_exiter/adapter_model.safetensors b/early_exit_20250818_layers_5_kl1_0/step_3500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4b73310f145ab5e806187e6187a64030933ee8b --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_3500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7301bbe4c5d7ad9f5ee5f9f55dc666023d389c2ab9e55667277235d0bce7b46 +size 1884453408 diff --git a/early_exit_20250818_layers_5_kl1_0/step_4000/early_exit_probes.pt b/early_exit_20250818_layers_5_kl1_0/step_4000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..91b089007c25ee74768717ebba5f9bff7b92e67a --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_4000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726a23350376409b896f1704feca0b171cad9f5895cf291cf77c23f6459b2826 +size 94502251 diff --git a/early_exit_20250818_layers_5_kl1_0/step_4000/early_exiter/adapter_model.safetensors b/early_exit_20250818_layers_5_kl1_0/step_4000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4cb0f924ebb1cc34311b7d535277dc9029bf4c85 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_4000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b571779ae4ac51248bed62bb332cd8d924e07db07533c69360c867774b4594f5 +size 1884453408 diff --git a/early_exit_20250818_layers_5_kl1_0/step_4500/early_exit_probes.pt b/early_exit_20250818_layers_5_kl1_0/step_4500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..f535a03a5aa2b0e8f859a6a64a80e40a2821dfc3 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_4500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00e7301175872fe838f85773bba225356ab479f3e485bd83450840b4b20ae87a +size 94502251 diff --git a/early_exit_20250818_layers_5_kl1_0/step_4500/early_exiter/adapter_config.json b/early_exit_20250818_layers_5_kl1_0/step_4500/early_exiter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3413e05cebb398a29a50b0bd9455aed509c56566 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_4500/early_exiter/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "o_proj", + "v_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/step_4500/early_exiter/adapter_model.safetensors b/early_exit_20250818_layers_5_kl1_0/step_4500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0fd495b2fbb34b4f5c1304664927488b57a3ee68 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_4500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9108b69dad0c1683f7b68bdfc91a8f65496b4958ce9f18431fdb4497e1e4085a +size 1884453408 diff --git a/early_exit_20250818_layers_5_kl1_0/step_50/README.md b/early_exit_20250818_layers_5_kl1_0/step_50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb0893c2a4d79d9bc98c4f39e5b899f11c962dfb --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_50/README.md @@ -0,0 +1,207 @@ +--- +base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.0 \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/step_50/early_exit_probes.pt b/early_exit_20250818_layers_5_kl1_0/step_50/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..c59ff3c80db7a4ec66086acc70aaedeb2fa851a4 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_50/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a84cc01e7821644b7c279f2b78ba076ede0f8df05803ea3dddd8c8df9641f89 +size 94502251 diff --git a/early_exit_20250818_layers_5_kl1_0/step_50/early_exiter/adapter_config.json b/early_exit_20250818_layers_5_kl1_0/step_50/early_exiter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b14a1f397b567057c5b3a21b52a67eb75ac69d8c --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_50/early_exiter/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/step_50/early_exiter/adapter_model.safetensors b/early_exit_20250818_layers_5_kl1_0/step_50/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..598f5e65eb1da5fc6cd0af38cbd52bb08debe95b --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_50/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a60b692b1258ae8e7ccaa556c713c22b638726a3722f8d2a3d809ff3b1425065 +size 1884453408 diff --git a/early_exit_20250818_layers_5_kl1_0/step_50/metadata.json b/early_exit_20250818_layers_5_kl1_0/step_50/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..4453fca5915cf996beb71cc25dfa4e16aa14609c --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_50/metadata.json @@ -0,0 +1,13 @@ +{ + "base_model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "exitable_layer_idxs": [ + 5.0, + 10.0, + 15.0, + 20.0, + 25.0, + Infinity + ], + "total_exitable_layers": 5, + "has_early_exit_probes": true +} \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/step_5000/README.md b/early_exit_20250818_layers_5_kl1_0/step_5000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb0893c2a4d79d9bc98c4f39e5b899f11c962dfb --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_5000/README.md @@ -0,0 +1,207 @@ +--- +base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.0 \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/step_5000/early_exit_probes.pt b/early_exit_20250818_layers_5_kl1_0/step_5000/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..612add248162e86063d2e7b920ad27a8813f7680 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_5000/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b724c1ace2c8f6df3b93f06f111b8042e60797132b26714b295cc3bfed0474f +size 94502251 diff --git a/early_exit_20250818_layers_5_kl1_0/step_5000/early_exiter/adapter_config.json b/early_exit_20250818_layers_5_kl1_0/step_5000/early_exiter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3413e05cebb398a29a50b0bd9455aed509c56566 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_5000/early_exiter/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "o_proj", + "v_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/step_5000/early_exiter/adapter_model.safetensors b/early_exit_20250818_layers_5_kl1_0/step_5000/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b65a46f80c945b5f16c4509be6fe73e213d427e --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_5000/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6b25ed2444ded1e2928182a8dc37eb64407ddd1c357624dbe6cae866d518f9d +size 1884453408 diff --git a/early_exit_20250818_layers_5_kl1_0/step_5000/metadata.json b/early_exit_20250818_layers_5_kl1_0/step_5000/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..4453fca5915cf996beb71cc25dfa4e16aa14609c --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_5000/metadata.json @@ -0,0 +1,13 @@ +{ + "base_model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "exitable_layer_idxs": [ + 5.0, + 10.0, + 15.0, + 20.0, + 25.0, + Infinity + ], + "total_exitable_layers": 5, + "has_early_exit_probes": true +} \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/step_5500/README.md b/early_exit_20250818_layers_5_kl1_0/step_5500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb0893c2a4d79d9bc98c4f39e5b899f11c962dfb --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_5500/README.md @@ -0,0 +1,207 @@ +--- +base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.0 \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/step_5500/early_exit_probes.pt b/early_exit_20250818_layers_5_kl1_0/step_5500/early_exit_probes.pt new file mode 100644 index 0000000000000000000000000000000000000000..0302d093d4e18857c5066537919017bef2a233e1 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_5500/early_exit_probes.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1efe2e370137c36b1682f9d980f7a9fb51dc9ab0fb8c4901ecf8393a059df236 +size 94502251 diff --git a/early_exit_20250818_layers_5_kl1_0/step_5500/early_exiter/adapter_config.json b/early_exit_20250818_layers_5_kl1_0/step_5500/early_exiter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3413e05cebb398a29a50b0bd9455aed509c56566 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_5500/early_exiter/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "o_proj", + "v_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/early_exit_20250818_layers_5_kl1_0/step_5500/early_exiter/adapter_model.safetensors b/early_exit_20250818_layers_5_kl1_0/step_5500/early_exiter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1158d7b89cbb05e713b5dacc2db3fcdaba2f3c57 --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_5500/early_exiter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8da9f9bc4015e80389197e8371902edbb35969bfbba3da100bca36c202ffe01c +size 1884453408 diff --git a/early_exit_20250818_layers_5_kl1_0/step_5500/metadata.json b/early_exit_20250818_layers_5_kl1_0/step_5500/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..4453fca5915cf996beb71cc25dfa4e16aa14609c --- /dev/null +++ b/early_exit_20250818_layers_5_kl1_0/step_5500/metadata.json @@ -0,0 +1,13 @@ +{ + "base_model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "exitable_layer_idxs": [ + 5.0, + 10.0, + 15.0, + 20.0, + 25.0, + Infinity + ], + "total_exitable_layers": 5, + "has_early_exit_probes": true +} \ No newline at end of file