Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260980.gamma.1523167.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/bias_shift/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/dp_calibration/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260966.gamma.1522817.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/dp_calibration/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/temp_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260971.gamma.1522931.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/temp_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/vector_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260975.gamma.1523035.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260999.gamma.1523628.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/bias_shift/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/dp_calibration/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260984.gamma.1523284.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/dp_calibration/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/temp_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260989.gamma.1523398.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/temp_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/vector_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260994.gamma.1523512.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737644236.gamma.1800848.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/bias_shift/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/dp_calibration/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737644217.gamma.1800451.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/dp_calibration/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/temp_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737644223.gamma.1800594.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/temp_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/vector_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737644230.gamma.1800724.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=6/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737761524.gamma.2125092.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/bias_shift/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/dp_calibration/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737761507.gamma.2124682.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/dp_calibration/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/temp_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737761513.gamma.2124805.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/temp_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=8/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737761563.gamma.2125740.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=8/vector_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737761557.gamma.2125617.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=256/seed=8/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=64/seed=4/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735259715.gamma.1499820.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=64/seed=4/bias_shift/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=64/seed=4/temp_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735259706.gamma.1499604.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=64/seed=4/temp_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=64/seed=4/vector_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735259710.gamma.1499703.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=64/seed=4/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=64/seed=6/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737643975.gamma.1796239.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=64/seed=6/bias_shift/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=64/seed=6/dp_calibration/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737643962.gamma.1795877.0 +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=64/seed=6/dp_calibration/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- calibration/llama3.2-1b-instruct/sst2/size=64/seed=6/temp_scaling/0.0-1.0/0.0-1.0/state.ckpt +3 -0
- finetune_lora/llama3.2-1b-instruct/agnews/size=8/seed=0/lora_ans/0.0-0.7/0.7-1.0/checkpoint/config.json +39 -0
- finetune_lora/llama3.2-1b-instruct/agnews/size=8/seed=0/lora_ans/0.0-0.7/0.7-1.0/checkpoint/generation_config.json +12 -0
- finetune_lora/llama3.2-1b-instruct/agnews/size=8/seed=0/lora_ans/0.0-0.7/0.7-1.0/checkpoint/model_config.yaml +40 -0
- finetune_lora/llama3.2-1b-instruct/agnews/size=8/seed=0/lora_ans/0.0-0.7/0.7-1.0/checkpoint/tokenizer.json +0 -0
calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260980.gamma.1523167.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a11fb7104b75c85b5431b7875fc49e4a59f4848665bb1be53be74e0ef510596b
|
| 3 |
+
size 51106
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/bias_shift/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33350fb04b883f5c2e605fa40caad0c69166193662b94791be1d52dbdc56d8ab
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/dp_calibration/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260966.gamma.1522817.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:451b07c195c84028a3ef915bac24be9da1592bfe48fb26513084e635d2d33f5d
|
| 3 |
+
size 126800
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/dp_calibration/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0f0676e8899d022b17a2878e205de788397bbbc9f83fbb61a62d455f020dd5c
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/temp_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260971.gamma.1522931.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df747abe41b39cdb656769703f196d134cdf555cccaad9255f81a72b242528cf
|
| 3 |
+
size 115576
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/temp_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d745ea54ade5f10ce40455404502f61aedce33007df321218e1d1f65926de0e8
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/vector_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260975.gamma.1523035.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc592ec7f82f8d2829840aa38faed7af425f329c45fb5c78ee5247611fde3161
|
| 3 |
+
size 156628
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=0/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:088e8368e51a640a9b8e7d27e39a268f749bd7817a287c207c2862915029c9d3
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260999.gamma.1523628.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa69b7f1909783c906ab1f4bef27527e996c86621edc2297a0a34c60ddfd3697
|
| 3 |
+
size 4254
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/bias_shift/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02bf0862d920cabd124e981dfe8ac7f782a7165b55ff47550a1a01f8594e0f43
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/dp_calibration/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260984.gamma.1523284.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c4c7b25ffdad62a86fabaddd329ac1fa6081d55584c450db9a36cabb546ce4d
|
| 3 |
+
size 151272
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/dp_calibration/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59cf94d779a2a54d73d210299382f099e0f4d6ca2347c92c8eaa6ae778c2ee5b
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/temp_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260989.gamma.1523398.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77da7adfdcc5d196abefa63a3c8ba6ada4511a248f563c9a713b55e447aa7027
|
| 3 |
+
size 151552
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/temp_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37262c0c290d357df40aab45d6bc623a2691cc417c55c1bdc59b7d1ab2a156cb
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/vector_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735260994.gamma.1523512.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a9669726d4c3a1c4bb037aa5de7678d99e89d5fec9f8296013e0d9a38674547
|
| 3 |
+
size 150700
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=1/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7bbab292910974229d459bd29c1f81b2ecc123c2f976c7bf37ea4018ead5364
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737644236.gamma.1800848.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee16b01fb92f364c433a1663c4050694e780b53f6b30466a4fd636659fcaea73
|
| 3 |
+
size 4810
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/bias_shift/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3be1cb0497d8a6ca50fa9710c641712fe9a1aeeb6771be9e8b3dc0c046c09d1d
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/dp_calibration/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737644217.gamma.1800451.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89dc43e72c3741e291e684f60ec03e0e0217261d44434503ceff6dbbfc51647b
|
| 3 |
+
size 211568
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/dp_calibration/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a3fe3b7cb533e69123b5454bb00ebff0cc464c98e1847f9195faad05bc49e76
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/temp_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737644223.gamma.1800594.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acac7d2acfb17a08963a69bf8f1394c5672cfb1748cba6cb545577e0fa9cee91
|
| 3 |
+
size 210140
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/temp_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39b7c8f4e7cadb2506e6df5339c40bbf9fef7e7c9e78861dbbb66194b9a68ed8
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/vector_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737644230.gamma.1800724.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:643c8d627ac3a28fbe4b1bf5d1060671b9ab9bde72e01d0a1113b59fd1ffa1a7
|
| 3 |
+
size 229896
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=5/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1185495bba24a5fcbd422436e7d663aaebc4605cc5964621998835dbca5a5237
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=6/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38de5541c2199d180ecb3c88cc8d42517db071f22c4df8b17721ebfffeb561d4
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737761524.gamma.2125092.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7e6404a1b7fe42beea8e9b5642e3628d69dac2235aa49ccc2fec76c8237d895
|
| 3 |
+
size 103876
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/bias_shift/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12e4ff259a8977ce2ddf7d12e76319e182b388e92872751d59df4e85ca71a991
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/dp_calibration/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737761507.gamma.2124682.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acc92eef9eb9818c745260eea073386f017858bb64801eb5662785a180a1e817
|
| 3 |
+
size 178356
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/dp_calibration/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6fa5253a707e2df8bb7b84accbb8328ae3f148926404c407c69ca4ca392a33b
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/temp_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737761513.gamma.2124805.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ff912a62a6c30de7e72551b38b27a2c9e4e7a7b4067ee08904f228377e34abc
|
| 3 |
+
size 168288
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/temp_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6b7e7c3b2fcedcad856d0d14c8adbd01f6f4fc0f05d2584881dcb19d5c58fe0
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=7/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9109b8d6dc8b0cd183e5b81aa7956fedf160ed64befd57a27fcb4d4b066ee56f
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=8/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737761563.gamma.2125740.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ade3010f77121eed78ad6c57fc5ee934197ef7b5649ef0adfcb2e93685c0ab12
|
| 3 |
+
size 4254
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=8/vector_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737761557.gamma.2125617.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9a1100574d07898b186a33db1fd34beda1e69732d40752dd5b873e83d87d096
|
| 3 |
+
size 216908
|
calibration/llama3.2-1b-instruct/sst2/size=256/seed=8/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0ff35ba111fdbda055d1c4b76da3b2f7edc25ea5ff2985ede33c63614d06ca4
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=64/seed=4/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735259715.gamma.1499820.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb238b14228c3cdee3abc3e2b96aa83578414801c265bad1026d83768c7101a9
|
| 3 |
+
size 111462
|
calibration/llama3.2-1b-instruct/sst2/size=64/seed=4/bias_shift/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d797ef2408b379176a972b23411f7c1f3fc48d352d524729e8df6106d4854d99
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=64/seed=4/temp_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735259706.gamma.1499604.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a0ac3eb82a4f609844b91f702c83cbc7c08e8ff5fb02aa960422b164b82ea8e
|
| 3 |
+
size 119132
|
calibration/llama3.2-1b-instruct/sst2/size=64/seed=4/temp_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c04500edce963bcd2e7229c6be42b0cb7a1208841deeb8299a806883555aaa57
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=64/seed=4/vector_scaling/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1735259710.gamma.1499703.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed537ee2052955e8b0db69bd4744868110e6ddc274281c3478911e61f5e6064d
|
| 3 |
+
size 192254
|
calibration/llama3.2-1b-instruct/sst2/size=64/seed=4/vector_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbf625af3fec8b1cce3a6b2817c1864d19188fc5f5379a781e82b42c22615fe9
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=64/seed=6/bias_shift/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737643975.gamma.1796239.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:864bfdabe876235c32e34890ece0d39c5e2978fced6cbde0ea8dc2f49454434e
|
| 3 |
+
size 157066
|
calibration/llama3.2-1b-instruct/sst2/size=64/seed=6/bias_shift/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:696a1809b3a9e6aacda879f183e9355f50db6364e0c9be0ef0730f89aa630113
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=64/seed=6/dp_calibration/0.0-1.0/0.0-1.0/logs/events.out.tfevents.1737643962.gamma.1795877.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9ff9fcccb01174c87890e656a7f9042edcfe4b4843f75011dc1b54833a1c5b1
|
| 3 |
+
size 181914
|
calibration/llama3.2-1b-instruct/sst2/size=64/seed=6/dp_calibration/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:884ec56b3d33c28b4721530af55b2118a70c0529763721281b1475bafed2aa67
|
| 3 |
+
size 1740
|
calibration/llama3.2-1b-instruct/sst2/size=64/seed=6/temp_scaling/0.0-1.0/0.0-1.0/state.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8b4007d6a6c3f0ad04f2e78ecf4fa38b21742833531e3cc9190a6719ca94cd9
|
| 3 |
+
size 1740
|
finetune_lora/llama3.2-1b-instruct/agnews/size=8/seed=0/lora_ans/0.0-0.7/0.7-1.0/checkpoint/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"LlamaForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 128000,
|
| 8 |
+
"eos_token_id": [
|
| 9 |
+
128001,
|
| 10 |
+
128008,
|
| 11 |
+
128009
|
| 12 |
+
],
|
| 13 |
+
"head_dim": 64,
|
| 14 |
+
"hidden_act": "silu",
|
| 15 |
+
"hidden_size": 2048,
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 8192,
|
| 18 |
+
"max_position_embeddings": 131072,
|
| 19 |
+
"mlp_bias": false,
|
| 20 |
+
"model_type": "llama",
|
| 21 |
+
"num_attention_heads": 32,
|
| 22 |
+
"num_hidden_layers": 16,
|
| 23 |
+
"num_key_value_heads": 8,
|
| 24 |
+
"pretraining_tp": 1,
|
| 25 |
+
"rms_norm_eps": 1e-05,
|
| 26 |
+
"rope_scaling": {
|
| 27 |
+
"factor": 32.0,
|
| 28 |
+
"high_freq_factor": 4.0,
|
| 29 |
+
"low_freq_factor": 1.0,
|
| 30 |
+
"original_max_position_embeddings": 8192,
|
| 31 |
+
"rope_type": "llama3"
|
| 32 |
+
},
|
| 33 |
+
"rope_theta": 500000.0,
|
| 34 |
+
"tie_word_embeddings": true,
|
| 35 |
+
"torch_dtype": "bfloat16",
|
| 36 |
+
"transformers_version": "4.45.0.dev0",
|
| 37 |
+
"use_cache": true,
|
| 38 |
+
"vocab_size": 128256
|
| 39 |
+
}
|
finetune_lora/llama3.2-1b-instruct/agnews/size=8/seed=0/lora_ans/0.0-0.7/0.7-1.0/checkpoint/generation_config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 128000,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
128001,
|
| 6 |
+
128008,
|
| 7 |
+
128009
|
| 8 |
+
],
|
| 9 |
+
"temperature": 0.6,
|
| 10 |
+
"top_p": 0.9,
|
| 11 |
+
"transformers_version": "4.45.0.dev0"
|
| 12 |
+
}
|
finetune_lora/llama3.2-1b-instruct/agnews/size=8/seed=0/lora_ans/0.0-0.7/0.7-1.0/checkpoint/model_config.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
attention_logit_softcapping: null
|
| 2 |
+
attention_scores_scalar: null
|
| 3 |
+
bias: false
|
| 4 |
+
block_size: 131072
|
| 5 |
+
final_logit_softcapping: null
|
| 6 |
+
gelu_approximate: none
|
| 7 |
+
head_size: 64
|
| 8 |
+
hf_config:
|
| 9 |
+
name: Llama-3.2-1B-Instruct
|
| 10 |
+
org: meta-llama
|
| 11 |
+
intermediate_size: 8192
|
| 12 |
+
lm_head_bias: false
|
| 13 |
+
mlp_class_name: LLaMAMLP
|
| 14 |
+
n_embd: 2048
|
| 15 |
+
n_expert: 0
|
| 16 |
+
n_expert_per_token: 0
|
| 17 |
+
n_head: 32
|
| 18 |
+
n_layer: 16
|
| 19 |
+
n_query_groups: 8
|
| 20 |
+
name: Llama-3.2-1B-Instruct
|
| 21 |
+
norm_class_name: RMSNorm
|
| 22 |
+
norm_eps: 1.0e-05
|
| 23 |
+
padded_vocab_size: 128256
|
| 24 |
+
padding_multiple: 512
|
| 25 |
+
parallel_residual: false
|
| 26 |
+
post_attention_norm: false
|
| 27 |
+
post_mlp_norm: false
|
| 28 |
+
rope_adjustments:
|
| 29 |
+
factor: 32.0
|
| 30 |
+
high_freq_factor: 4.0
|
| 31 |
+
low_freq_factor: 1.0
|
| 32 |
+
original_max_seq_len: 8192
|
| 33 |
+
rope_base: 500000
|
| 34 |
+
rope_condense_ratio: 1
|
| 35 |
+
rotary_percentage: 1.0
|
| 36 |
+
scale_embeddings: false
|
| 37 |
+
shared_attention_norm: false
|
| 38 |
+
sliding_window_layer_placing: null
|
| 39 |
+
sliding_window_size: null
|
| 40 |
+
vocab_size: 128000
|
finetune_lora/llama3.2-1b-instruct/agnews/size=8/seed=0/lora_ans/0.0-0.7/0.7-1.0/checkpoint/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|