Upload folder using huggingface_hub
Browse files- config.yaml +41 -0
- metrics.json +174 -0
- model_1000.pt +3 -0
- model_2000.pt +3 -0
- model_3000.pt +3 -0
- model_4000.pt +3 -0
- model_5000.pt +3 -0
- model_6000.pt +3 -0
- model_7000.pt +3 -0
- state.pt +3 -0
config.yaml
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
config: glm_config.yaml
|
| 2 |
+
model:
|
| 3 |
+
family: gpt2
|
| 4 |
+
n_dims: 10
|
| 5 |
+
n_embd: 256
|
| 6 |
+
n_head: 8
|
| 7 |
+
n_layer: 12
|
| 8 |
+
n_positions: 101
|
| 9 |
+
out_dir: /home/derixu/Documents/in-context-learning-GLM/glm_weights/f1d29486-1a8f-41ba-bc70-0393f3af1d5b
|
| 10 |
+
test_run: false
|
| 11 |
+
training:
|
| 12 |
+
batch_size: 64
|
| 13 |
+
curriculum:
|
| 14 |
+
dims:
|
| 15 |
+
end: 10
|
| 16 |
+
inc: 0
|
| 17 |
+
interval: 10000
|
| 18 |
+
start: 10
|
| 19 |
+
points:
|
| 20 |
+
end: 40
|
| 21 |
+
inc: 0
|
| 22 |
+
interval: 10000
|
| 23 |
+
start: 40
|
| 24 |
+
data: gaussian
|
| 25 |
+
keep_every_steps: 1000
|
| 26 |
+
learning_rate: 0.00025
|
| 27 |
+
num_tasks: null
|
| 28 |
+
num_training_examples: null
|
| 29 |
+
resume_id: null
|
| 30 |
+
save_every_steps: 1000
|
| 31 |
+
task: GLM
|
| 32 |
+
task_kwargs:
|
| 33 |
+
function_type: linear
|
| 34 |
+
scaling: 0.32
|
| 35 |
+
train_steps: 8000
|
| 36 |
+
wandb:
|
| 37 |
+
entity: derryxu
|
| 38 |
+
log_every_steps: 100
|
| 39 |
+
name: null
|
| 40 |
+
notes: ICL GLM training
|
| 41 |
+
project: in-context-training
|
metrics.json
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"standard": {
|
| 3 |
+
"gpt2_embd=256_layer=12_head=8": {
|
| 4 |
+
"mean": [
|
| 5 |
+
435.66064453125,
|
| 6 |
+
354.0453796386719,
|
| 7 |
+
434.26837158203125,
|
| 8 |
+
383.37506103515625,
|
| 9 |
+
399.5060119628906,
|
| 10 |
+
402.784912109375,
|
| 11 |
+
438.0303649902344,
|
| 12 |
+
409.4374084472656,
|
| 13 |
+
370.25018310546875,
|
| 14 |
+
367.0221862792969,
|
| 15 |
+
409.5334167480469,
|
| 16 |
+
340.23321533203125,
|
| 17 |
+
385.4977722167969,
|
| 18 |
+
357.9358825683594,
|
| 19 |
+
415.44232177734375,
|
| 20 |
+
420.10992431640625,
|
| 21 |
+
426.7008361816406,
|
| 22 |
+
368.7876892089844,
|
| 23 |
+
383.91064453125,
|
| 24 |
+
382.27978515625,
|
| 25 |
+
368.6982421875,
|
| 26 |
+
390.12310791015625,
|
| 27 |
+
388.86309814453125,
|
| 28 |
+
362.7231140136719,
|
| 29 |
+
364.34234619140625,
|
| 30 |
+
404.6338806152344,
|
| 31 |
+
389.669677734375,
|
| 32 |
+
370.7619323730469,
|
| 33 |
+
352.1150817871094,
|
| 34 |
+
393.02459716796875,
|
| 35 |
+
420.06787109375,
|
| 36 |
+
370.9207763671875,
|
| 37 |
+
374.46246337890625,
|
| 38 |
+
406.9833984375,
|
| 39 |
+
386.9122314453125,
|
| 40 |
+
369.3869934082031,
|
| 41 |
+
422.01580810546875,
|
| 42 |
+
378.33880615234375,
|
| 43 |
+
340.59991455078125,
|
| 44 |
+
351.04736328125
|
| 45 |
+
],
|
| 46 |
+
"std": [
|
| 47 |
+
1023.4417114257812,
|
| 48 |
+
928.055419921875,
|
| 49 |
+
1012.333984375,
|
| 50 |
+
954.5304565429688,
|
| 51 |
+
956.4081420898438,
|
| 52 |
+
986.5963745117188,
|
| 53 |
+
1018.4349365234375,
|
| 54 |
+
972.6322631835938,
|
| 55 |
+
941.1144409179688,
|
| 56 |
+
906.0840454101562,
|
| 57 |
+
978.5390625,
|
| 58 |
+
882.5136108398438,
|
| 59 |
+
943.0443115234375,
|
| 60 |
+
905.9511108398438,
|
| 61 |
+
1004.9692993164062,
|
| 62 |
+
969.0228271484375,
|
| 63 |
+
974.656494140625,
|
| 64 |
+
936.169189453125,
|
| 65 |
+
940.4185791015625,
|
| 66 |
+
957.3109130859375,
|
| 67 |
+
915.7345581054688,
|
| 68 |
+
942.5462036132812,
|
| 69 |
+
957.1474609375,
|
| 70 |
+
915.1956787109375,
|
| 71 |
+
922.2449951171875,
|
| 72 |
+
950.1206665039062,
|
| 73 |
+
953.1669921875,
|
| 74 |
+
931.5992431640625,
|
| 75 |
+
903.25244140625,
|
| 76 |
+
949.4171142578125,
|
| 77 |
+
996.4799194335938,
|
| 78 |
+
940.2189331054688,
|
| 79 |
+
946.6638793945312,
|
| 80 |
+
970.4464111328125,
|
| 81 |
+
941.7376098632812,
|
| 82 |
+
931.8547973632812,
|
| 83 |
+
991.169189453125,
|
| 84 |
+
956.2705688476562,
|
| 85 |
+
881.926513671875,
|
| 86 |
+
917.1022338867188
|
| 87 |
+
],
|
| 88 |
+
"bootstrap_low": [
|
| 89 |
+
389.000732421875,
|
| 90 |
+
314.3379821777344,
|
| 91 |
+
386.958251953125,
|
| 92 |
+
339.7007141113281,
|
| 93 |
+
357.15814208984375,
|
| 94 |
+
358.50433349609375,
|
| 95 |
+
388.43121337890625,
|
| 96 |
+
363.8498840332031,
|
| 97 |
+
326.8326110839844,
|
| 98 |
+
324.1805725097656,
|
| 99 |
+
364.93634033203125,
|
| 100 |
+
300.06256103515625,
|
| 101 |
+
338.8838806152344,
|
| 102 |
+
318.0883483886719,
|
| 103 |
+
368.88616943359375,
|
| 104 |
+
379.0393981933594,
|
| 105 |
+
381.4306945800781,
|
| 106 |
+
324.41387939453125,
|
| 107 |
+
341.67681884765625,
|
| 108 |
+
341.8121643066406,
|
| 109 |
+
329.7847900390625,
|
| 110 |
+
345.211181640625,
|
| 111 |
+
347.4241027832031,
|
| 112 |
+
323.92401123046875,
|
| 113 |
+
324.4379577636719,
|
| 114 |
+
359.37689208984375,
|
| 115 |
+
348.2349548339844,
|
| 116 |
+
330.6774597167969,
|
| 117 |
+
313.04620361328125,
|
| 118 |
+
347.4327392578125,
|
| 119 |
+
374.65093994140625,
|
| 120 |
+
330.4932556152344,
|
| 121 |
+
329.59893798828125,
|
| 122 |
+
367.0416259765625,
|
| 123 |
+
345.93328857421875,
|
| 124 |
+
327.23577880859375,
|
| 125 |
+
379.28387451171875,
|
| 126 |
+
339.1770935058594,
|
| 127 |
+
302.7874755859375,
|
| 128 |
+
308.92901611328125
|
| 129 |
+
],
|
| 130 |
+
"bootstrap_high": [
|
| 131 |
+
484.8331604003906,
|
| 132 |
+
396.8553466796875,
|
| 133 |
+
481.001708984375,
|
| 134 |
+
428.6622619628906,
|
| 135 |
+
445.95166015625,
|
| 136 |
+
445.2425842285156,
|
| 137 |
+
487.20489501953125,
|
| 138 |
+
454.6344299316406,
|
| 139 |
+
414.505126953125,
|
| 140 |
+
407.9673767089844,
|
| 141 |
+
454.6570739746094,
|
| 142 |
+
378.86480712890625,
|
| 143 |
+
429.1602478027344,
|
| 144 |
+
398.9708557128906,
|
| 145 |
+
460.9373474121094,
|
| 146 |
+
463.269775390625,
|
| 147 |
+
475.2002868652344,
|
| 148 |
+
412.81597900390625,
|
| 149 |
+
432.5765686035156,
|
| 150 |
+
426.57501220703125,
|
| 151 |
+
411.27142333984375,
|
| 152 |
+
432.06439208984375,
|
| 153 |
+
431.69866943359375,
|
| 154 |
+
405.1575622558594,
|
| 155 |
+
407.22625732421875,
|
| 156 |
+
450.1597595214844,
|
| 157 |
+
431.63677978515625,
|
| 158 |
+
415.54998779296875,
|
| 159 |
+
392.48016357421875,
|
| 160 |
+
435.62127685546875,
|
| 161 |
+
462.593017578125,
|
| 162 |
+
413.58929443359375,
|
| 163 |
+
421.81707763671875,
|
| 164 |
+
454.3692321777344,
|
| 165 |
+
431.091552734375,
|
| 166 |
+
413.1641540527344,
|
| 167 |
+
469.64031982421875,
|
| 168 |
+
423.5462341308594,
|
| 169 |
+
385.13836669921875,
|
| 170 |
+
394.6562805175781
|
| 171 |
+
]
|
| 172 |
+
}
|
| 173 |
+
}
|
| 174 |
+
}
|
model_1000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2da507e2039e51129661d6bd34a89899b186d3b270dddb1327f51ef03c011ba2
|
| 3 |
+
size 90145655
|
model_2000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fcc33a29ee491e97d608e83cfdd6964f985b0a4f3121ddb9c597fd5eac60e3b4
|
| 3 |
+
size 90145655
|
model_3000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27508086033adb1175272e633befe3718f6856622293287d1ce8840530d875fa
|
| 3 |
+
size 90145655
|
model_4000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d93f792d4831cd9cfc1df2b72cdec235ec8a64a2da907fb6d541b3e2a9eb1a34
|
| 3 |
+
size 90145655
|
model_5000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:584b818a0cfa2f869fc6583ab129ccf853808c20a4b18672a151df5c6f3be930
|
| 3 |
+
size 90145655
|
model_6000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37bb0b79a1fa7bb03743ca08f6af7c87d403f85ed1a49812639c5e4c513e6e82
|
| 3 |
+
size 90145655
|
model_7000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd82c89654e7ec8a6c505f1d9999989cdc0bbb854741570ccc04b3a25fbc3251
|
| 3 |
+
size 90145655
|
state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35993bd158b81a9ed2d7e71bf6f8f0ce69435b3d653ed3986abd64f2fb320aee
|
| 3 |
+
size 166501481
|