Upload folder using huggingface_hub
Browse files- config.yaml +43 -0
- metrics.json +174 -0
- model_1000.pt +3 -0
- model_2000.pt +3 -0
- model_3000.pt +3 -0
- model_4000.pt +3 -0
- model_5000.pt +3 -0
- model_6000.pt +3 -0
- model_7000.pt +3 -0
- model_8000.pt +3 -0
- model_9000.pt +3 -0
- state.pt +3 -0
config.yaml
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
config: /root/in-context-learning-GLM/src/glm_configs/joint_training.yaml
|
| 2 |
+
model:
|
| 3 |
+
family: gpt2
|
| 4 |
+
n_dims: 10
|
| 5 |
+
n_embd: 256
|
| 6 |
+
n_head: 8
|
| 7 |
+
n_layer: 12
|
| 8 |
+
n_positions: 40
|
| 9 |
+
out_dir: gs://glm_weights/75172107-78d8-48f6-907d-3b043d14e194
|
| 10 |
+
test_run: false
|
| 11 |
+
training:
|
| 12 |
+
batch_size: 256
|
| 13 |
+
curriculum:
|
| 14 |
+
dims:
|
| 15 |
+
end: 10
|
| 16 |
+
inc: 0
|
| 17 |
+
interval: 2000
|
| 18 |
+
start: 10
|
| 19 |
+
points:
|
| 20 |
+
end: 40
|
| 21 |
+
inc: 0
|
| 22 |
+
interval: 2000
|
| 23 |
+
start: 40
|
| 24 |
+
data: gaussian
|
| 25 |
+
keep_every_steps: 1000
|
| 26 |
+
learning_rate: 0.0002
|
| 27 |
+
num_tasks: null
|
| 28 |
+
num_training_examples: null
|
| 29 |
+
resume_id: null
|
| 30 |
+
save_every_steps: 1000
|
| 31 |
+
task: GLM
|
| 32 |
+
task_kwargs:
|
| 33 |
+
function_type:
|
| 34 |
+
- neg_binomial
|
| 35 |
+
- poisson
|
| 36 |
+
scaling: 0.32
|
| 37 |
+
train_steps: 10000
|
| 38 |
+
wandb:
|
| 39 |
+
entity: in-context
|
| 40 |
+
log_every_steps: 10
|
| 41 |
+
name: null
|
| 42 |
+
notes: ''
|
| 43 |
+
project: in-context-training
|
metrics.json
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"standard": {
|
| 3 |
+
"gpt2_embd=256_layer=12_head=8": {
|
| 4 |
+
"mean": [
|
| 5 |
+
398.8111877441406,
|
| 6 |
+
424.48876953125,
|
| 7 |
+
427.48944091796875,
|
| 8 |
+
404.28692626953125,
|
| 9 |
+
364.2803649902344,
|
| 10 |
+
374.157470703125,
|
| 11 |
+
415.23126220703125,
|
| 12 |
+
401.9036865234375,
|
| 13 |
+
387.84368896484375,
|
| 14 |
+
420.93109130859375,
|
| 15 |
+
413.0240783691406,
|
| 16 |
+
428.47607421875,
|
| 17 |
+
457.60821533203125,
|
| 18 |
+
402.51763916015625,
|
| 19 |
+
378.2535095214844,
|
| 20 |
+
411.93109130859375,
|
| 21 |
+
417.1578063964844,
|
| 22 |
+
452.75286865234375,
|
| 23 |
+
395.9964294433594,
|
| 24 |
+
435.2483825683594,
|
| 25 |
+
375.732421875,
|
| 26 |
+
388.1903076171875,
|
| 27 |
+
330.987548828125,
|
| 28 |
+
392.2677307128906,
|
| 29 |
+
397.4671325683594,
|
| 30 |
+
360.8671569824219,
|
| 31 |
+
439.656494140625,
|
| 32 |
+
443.99395751953125,
|
| 33 |
+
446.53936767578125,
|
| 34 |
+
446.6165466308594,
|
| 35 |
+
396.2080078125,
|
| 36 |
+
378.29815673828125,
|
| 37 |
+
422.22906494140625,
|
| 38 |
+
428.92626953125,
|
| 39 |
+
396.57122802734375,
|
| 40 |
+
418.82177734375,
|
| 41 |
+
417.5713806152344,
|
| 42 |
+
407.671142578125,
|
| 43 |
+
358.8497009277344,
|
| 44 |
+
380.177734375
|
| 45 |
+
],
|
| 46 |
+
"std": [
|
| 47 |
+
974.3933715820312,
|
| 48 |
+
1035.168701171875,
|
| 49 |
+
1017.1061401367188,
|
| 50 |
+
988.4049682617188,
|
| 51 |
+
948.970703125,
|
| 52 |
+
935.0480346679688,
|
| 53 |
+
1033.254638671875,
|
| 54 |
+
984.4662475585938,
|
| 55 |
+
983.4767456054688,
|
| 56 |
+
1053.7890625,
|
| 57 |
+
990.7879638671875,
|
| 58 |
+
1052.0966796875,
|
| 59 |
+
1078.685302734375,
|
| 60 |
+
1006.2810668945312,
|
| 61 |
+
977.1021728515625,
|
| 62 |
+
1013.4202880859375,
|
| 63 |
+
1037.21826171875,
|
| 64 |
+
1076.2752685546875,
|
| 65 |
+
1000.4907836914062,
|
| 66 |
+
1046.0399169921875,
|
| 67 |
+
962.902587890625,
|
| 68 |
+
1009.9147338867188,
|
| 69 |
+
898.5845336914062,
|
| 70 |
+
981.4733276367188,
|
| 71 |
+
1012.4067993164062,
|
| 72 |
+
957.7681884765625,
|
| 73 |
+
1034.0697021484375,
|
| 74 |
+
1052.1759033203125,
|
| 75 |
+
1007.5968627929688,
|
| 76 |
+
1042.7177734375,
|
| 77 |
+
975.763427734375,
|
| 78 |
+
968.6481323242188,
|
| 79 |
+
1005.27880859375,
|
| 80 |
+
1050.577880859375,
|
| 81 |
+
1003.8203735351562,
|
| 82 |
+
1039.170166015625,
|
| 83 |
+
1029.23974609375,
|
| 84 |
+
1032.8634033203125,
|
| 85 |
+
915.1911010742188,
|
| 86 |
+
985.11279296875
|
| 87 |
+
],
|
| 88 |
+
"bootstrap_low": [
|
| 89 |
+
357.518310546875,
|
| 90 |
+
376.17254638671875,
|
| 91 |
+
383.3957214355469,
|
| 92 |
+
363.01849365234375,
|
| 93 |
+
318.067138671875,
|
| 94 |
+
334.20550537109375,
|
| 95 |
+
366.90606689453125,
|
| 96 |
+
356.07562255859375,
|
| 97 |
+
341.6206359863281,
|
| 98 |
+
375.2862548828125,
|
| 99 |
+
369.82135009765625,
|
| 100 |
+
379.7337951660156,
|
| 101 |
+
411.330078125,
|
| 102 |
+
356.6771240234375,
|
| 103 |
+
334.59295654296875,
|
| 104 |
+
367.9912414550781,
|
| 105 |
+
372.1358642578125,
|
| 106 |
+
403.588134765625,
|
| 107 |
+
349.64849853515625,
|
| 108 |
+
386.1518859863281,
|
| 109 |
+
332.25677490234375,
|
| 110 |
+
340.4033203125,
|
| 111 |
+
291.7320251464844,
|
| 112 |
+
346.8641052246094,
|
| 113 |
+
351.9197692871094,
|
| 114 |
+
317.75762939453125,
|
| 115 |
+
392.1034240722656,
|
| 116 |
+
396.23651123046875,
|
| 117 |
+
401.701416015625,
|
| 118 |
+
396.9372253417969,
|
| 119 |
+
353.0738525390625,
|
| 120 |
+
330.24176025390625,
|
| 121 |
+
375.48876953125,
|
| 122 |
+
379.8965759277344,
|
| 123 |
+
351.11651611328125,
|
| 124 |
+
373.14471435546875,
|
| 125 |
+
372.0242614746094,
|
| 126 |
+
360.08880615234375,
|
| 127 |
+
319.4432678222656,
|
| 128 |
+
335.47113037109375
|
| 129 |
+
],
|
| 130 |
+
"bootstrap_high": [
|
| 131 |
+
442.44268798828125,
|
| 132 |
+
474.25860595703125,
|
| 133 |
+
472.61029052734375,
|
| 134 |
+
453.3785095214844,
|
| 135 |
+
410.044677734375,
|
| 136 |
+
418.92401123046875,
|
| 137 |
+
464.7100524902344,
|
| 138 |
+
450.4619140625,
|
| 139 |
+
430.6653747558594,
|
| 140 |
+
468.4326171875,
|
| 141 |
+
458.427734375,
|
| 142 |
+
478.40496826171875,
|
| 143 |
+
509.2894592285156,
|
| 144 |
+
446.06488037109375,
|
| 145 |
+
422.875732421875,
|
| 146 |
+
456.1737365722656,
|
| 147 |
+
466.70245361328125,
|
| 148 |
+
509.68206787109375,
|
| 149 |
+
446.3418884277344,
|
| 150 |
+
485.0526428222656,
|
| 151 |
+
418.3226623535156,
|
| 152 |
+
435.7364807128906,
|
| 153 |
+
373.25665283203125,
|
| 154 |
+
437.79364013671875,
|
| 155 |
+
444.90496826171875,
|
| 156 |
+
406.0072326660156,
|
| 157 |
+
490.11260986328125,
|
| 158 |
+
495.37823486328125,
|
| 159 |
+
494.4657287597656,
|
| 160 |
+
495.23968505859375,
|
| 161 |
+
442.8514099121094,
|
| 162 |
+
422.07012939453125,
|
| 163 |
+
466.7059020996094,
|
| 164 |
+
477.04443359375,
|
| 165 |
+
441.86639404296875,
|
| 166 |
+
469.0641174316406,
|
| 167 |
+
463.74835205078125,
|
| 168 |
+
455.8782653808594,
|
| 169 |
+
403.5154724121094,
|
| 170 |
+
426.58843994140625
|
| 171 |
+
]
|
| 172 |
+
}
|
| 173 |
+
}
|
| 174 |
+
}
|
model_1000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dcbc3980616934e52593c1b6aabadfd1ffd311ffd58a181569f560293c10abe
|
| 3 |
+
size 89608311
|
model_2000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85e1860adaf098ccb793cc5c8714b376dffe06f12214ac2c465db0c74e748108
|
| 3 |
+
size 89608311
|
model_3000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a605d953ff08f001409b7e57014d0193ce50836757faa98828780da1189e831
|
| 3 |
+
size 89608311
|
model_4000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2580e05d77a878d6bea0b2624506b9392a327271fa09239cd76e74f1e8fb75fe
|
| 3 |
+
size 89608311
|
model_5000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c04b0dce582c2813f173859e6d97ea349eb9f36acaa61679c7c5517fa71e3662
|
| 3 |
+
size 89608311
|
model_6000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44efb4a51831dcd7b697092abfb63c218b91b4fa5a24f2b44271db868b98be04
|
| 3 |
+
size 89608311
|
model_7000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dc37cbc1d89c915b9553c036e7167b4ce00455d922ebe813056c19340d01565
|
| 3 |
+
size 89608311
|
model_8000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f04d699189f807c8989ae5ab942f7322e60a0f2bcdec0e7a0d813e8484e62a3
|
| 3 |
+
size 89608311
|
model_9000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e40e50e8c300d900a2c301dda9028e0f396409fd66fec5b374bfb52cdbc4df07
|
| 3 |
+
size 89608311
|
state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb7422d631a873855397522728d07873245675520ccbe6c76040b1ca99652b95
|
| 3 |
+
size 165713513
|