MelMitchell8 commited on
Commit
3289f63
·
1 Parent(s): f7ae1ca

Upload 37 files

Browse files
Files changed (37) hide show
  1. experiments/.DS_Store +0 -0
  2. experiments/adapter_config.json +20 -0
  3. experiments/adapter_model.bin +3 -0
  4. experiments/checkpoint-200/optimizer.pt +3 -0
  5. experiments/checkpoint-200/pytorch_model.bin +3 -0
  6. experiments/checkpoint-200/rng_state.pth +3 -0
  7. experiments/checkpoint-200/scheduler.pt +3 -0
  8. experiments/checkpoint-200/trainer_state.json +168 -0
  9. experiments/checkpoint-200/training_args.bin +3 -0
  10. experiments/checkpoint-250/optimizer.pt +3 -0
  11. experiments/checkpoint-250/pytorch_model.bin +3 -0
  12. experiments/checkpoint-250/rng_state.pth +3 -0
  13. experiments/checkpoint-250/scheduler.pt +3 -0
  14. experiments/checkpoint-250/trainer_state.json +206 -0
  15. experiments/checkpoint-250/training_args.bin +3 -0
  16. experiments/checkpoint-300/optimizer.pt +3 -0
  17. experiments/checkpoint-300/pytorch_model.bin +3 -0
  18. experiments/checkpoint-300/rng_state.pth +3 -0
  19. experiments/checkpoint-300/scheduler.pt +3 -0
  20. experiments/checkpoint-300/trainer_state.json +244 -0
  21. experiments/checkpoint-300/training_args.bin +3 -0
  22. experiments/runs/Jun01_05-20-38_342e8c5b39dd/1685596839.0285306/events.out.tfevents.1685596839.342e8c5b39dd.10274.1 +3 -0
  23. experiments/runs/Jun01_05-20-38_342e8c5b39dd/1685596845.6246781/events.out.tfevents.1685596845.342e8c5b39dd.10274.3 +3 -0
  24. experiments/runs/Jun01_05-20-38_342e8c5b39dd/events.out.tfevents.1685596839.342e8c5b39dd.10274.0 +3 -0
  25. experiments/runs/Jun01_05-20-38_342e8c5b39dd/events.out.tfevents.1685596845.342e8c5b39dd.10274.2 +3 -0
  26. experiments/runs/Jun01_05-25-44_342e8c5b39dd/1685597145.1742842/events.out.tfevents.1685597145.342e8c5b39dd.13098.1 +3 -0
  27. experiments/runs/Jun01_05-25-44_342e8c5b39dd/events.out.tfevents.1685597145.342e8c5b39dd.13098.0 +3 -0
  28. experiments/runs/Jun01_07-13-15_090d8d4aa7e2/1685603596.0617163/events.out.tfevents.1685603596.090d8d4aa7e2.1898.1 +3 -0
  29. experiments/runs/Jun01_07-13-15_090d8d4aa7e2/events.out.tfevents.1685603596.090d8d4aa7e2.1898.0 +3 -0
  30. experiments/runs/Jun01_08-58-37_11b0426da60f/1685609918.3124719/events.out.tfevents.1685609918.11b0426da60f.611.1 +3 -0
  31. experiments/runs/Jun01_08-58-37_11b0426da60f/events.out.tfevents.1685609918.11b0426da60f.611.0 +3 -0
  32. experiments/runs/Jun01_09-00-06_05555eff8aef/1685610006.9899611/events.out.tfevents.1685610007.05555eff8aef.2638.1 +3 -0
  33. experiments/runs/Jun01_09-00-06_05555eff8aef/events.out.tfevents.1685610006.05555eff8aef.2638.0 +3 -0
  34. experiments/runs/Jun01_16-23-40_05555eff8aef/1685636620.8436341/events.out.tfevents.1685636620.05555eff8aef.113292.1 +3 -0
  35. experiments/runs/Jun01_16-23-40_05555eff8aef/events.out.tfevents.1685636620.05555eff8aef.113292.0 +3 -0
  36. experiments/runs/Jun01_19-42-51_d1618e311822/1685648572.7311761/events.out.tfevents.1685648572.d1618e311822.5586.1 +3 -0
  37. experiments/runs/Jun01_19-42-51_d1618e311822/events.out.tfevents.1685648572.d1618e311822.5586.0 +3 -0
experiments/.DS_Store ADDED
Binary file (6.15 kB). View file
 
experiments/adapter_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "decapoda-research/llama-7b-hf",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "layers_pattern": null,
8
+ "layers_to_transform": null,
9
+ "lora_alpha": 16,
10
+ "lora_dropout": 0.05,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 8,
14
+ "revision": null,
15
+ "target_modules": [
16
+ "q_proj",
17
+ "v_proj"
18
+ ],
19
+ "task_type": "CAUSAL_LM"
20
+ }
experiments/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406
3
+ size 443
experiments/checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a3eb06a66b599a0807a07ddfa32489b8c0cf87fa0eea6db3d52000561ff6ed6
3
+ size 33661637
experiments/checkpoint-200/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d0f8e7b279fab15d49443678eccd6482012c33bb76f6d9e6ba28035e113200e
3
+ size 16822989
experiments/checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:632fe9e673aa097636302f655e7ca5746c01c4d73d7117950cc839f7c5530c88
3
+ size 14575
experiments/checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:070d5feb292b6cdd1e6d789531daa5537e6f20b2480db363c1778451c80883ab
3
+ size 627
experiments/checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6030644774436951,
3
+ "best_model_checkpoint": "experiments/checkpoint-200",
4
+ "epoch": 0.311284046692607,
5
+ "global_step": 200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 2.9999999999999997e-05,
13
+ "loss": 2.5075,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.03,
18
+ "learning_rate": 5.6999999999999996e-05,
19
+ "loss": 2.3737,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.05,
24
+ "learning_rate": 8.699999999999999e-05,
25
+ "loss": 2.0938,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.06,
30
+ "learning_rate": 0.00011399999999999999,
31
+ "loss": 1.4761,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.08,
36
+ "learning_rate": 0.00014399999999999998,
37
+ "loss": 1.0161,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.08,
42
+ "eval_loss": 0.9799935817718506,
43
+ "eval_runtime": 9.5951,
44
+ "eval_samples_per_second": 20.844,
45
+ "eval_steps_per_second": 2.605,
46
+ "step": 50
47
+ },
48
+ {
49
+ "epoch": 0.09,
50
+ "learning_rate": 0.00017399999999999997,
51
+ "loss": 0.9253,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.11,
56
+ "learning_rate": 0.000204,
57
+ "loss": 0.8183,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.12,
62
+ "learning_rate": 0.000234,
63
+ "loss": 0.7652,
64
+ "step": 80
65
+ },
66
+ {
67
+ "epoch": 0.14,
68
+ "learning_rate": 0.00026399999999999997,
69
+ "loss": 0.7233,
70
+ "step": 90
71
+ },
72
+ {
73
+ "epoch": 0.16,
74
+ "learning_rate": 0.000294,
75
+ "loss": 0.7036,
76
+ "step": 100
77
+ },
78
+ {
79
+ "epoch": 0.16,
80
+ "eval_loss": 0.7070202827453613,
81
+ "eval_runtime": 9.6202,
82
+ "eval_samples_per_second": 20.79,
83
+ "eval_steps_per_second": 2.599,
84
+ "step": 100
85
+ },
86
+ {
87
+ "epoch": 0.17,
88
+ "learning_rate": 0.00028799999999999995,
89
+ "loss": 0.6666,
90
+ "step": 110
91
+ },
92
+ {
93
+ "epoch": 0.19,
94
+ "learning_rate": 0.00027299999999999997,
95
+ "loss": 0.6375,
96
+ "step": 120
97
+ },
98
+ {
99
+ "epoch": 0.2,
100
+ "learning_rate": 0.000258,
101
+ "loss": 0.6207,
102
+ "step": 130
103
+ },
104
+ {
105
+ "epoch": 0.22,
106
+ "learning_rate": 0.000243,
107
+ "loss": 0.6246,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.23,
112
+ "learning_rate": 0.00022799999999999999,
113
+ "loss": 0.6133,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.23,
118
+ "eval_loss": 0.6191394925117493,
119
+ "eval_runtime": 9.5788,
120
+ "eval_samples_per_second": 20.879,
121
+ "eval_steps_per_second": 2.61,
122
+ "step": 150
123
+ },
124
+ {
125
+ "epoch": 0.25,
126
+ "learning_rate": 0.00021299999999999997,
127
+ "loss": 0.6044,
128
+ "step": 160
129
+ },
130
+ {
131
+ "epoch": 0.26,
132
+ "learning_rate": 0.000198,
133
+ "loss": 0.6098,
134
+ "step": 170
135
+ },
136
+ {
137
+ "epoch": 0.28,
138
+ "learning_rate": 0.00018299999999999998,
139
+ "loss": 0.6011,
140
+ "step": 180
141
+ },
142
+ {
143
+ "epoch": 0.3,
144
+ "learning_rate": 0.000168,
145
+ "loss": 0.594,
146
+ "step": 190
147
+ },
148
+ {
149
+ "epoch": 0.31,
150
+ "learning_rate": 0.00015299999999999998,
151
+ "loss": 0.6002,
152
+ "step": 200
153
+ },
154
+ {
155
+ "epoch": 0.31,
156
+ "eval_loss": 0.6030644774436951,
157
+ "eval_runtime": 9.6172,
158
+ "eval_samples_per_second": 20.796,
159
+ "eval_steps_per_second": 2.6,
160
+ "step": 200
161
+ }
162
+ ],
163
+ "max_steps": 300,
164
+ "num_train_epochs": 1,
165
+ "total_flos": 1.7718117662588928e+17,
166
+ "trial_name": null,
167
+ "trial_params": null
168
+ }
experiments/checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be4ec138b6339576c00fef284fd852b5850365d5171336195581652280fa859d
3
+ size 3899
experiments/checkpoint-250/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f33867a68b34b68aee6b7e327a6c5ae88a958b71d6df154c4b00b7572f1a1c0
3
+ size 33661637
experiments/checkpoint-250/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c67dfd06a2b81ab91049f6fc5c7d34345e3923ab46ff47060ab4ceb2458b5275
3
+ size 16822989
experiments/checkpoint-250/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b0dbc1ef06a8bc675ce99c64e3cfe20966be764652d80828d89f3b5ce43fcc6
3
+ size 14575
experiments/checkpoint-250/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fba7862dfae2e2b45b88440253138cc9d2ebd0c12a903e1b4d46e6141f5a3a5e
3
+ size 627
experiments/checkpoint-250/trainer_state.json ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.594174325466156,
3
+ "best_model_checkpoint": "experiments/checkpoint-250",
4
+ "epoch": 0.38910505836575876,
5
+ "global_step": 250,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 2.9999999999999997e-05,
13
+ "loss": 2.5075,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.03,
18
+ "learning_rate": 5.6999999999999996e-05,
19
+ "loss": 2.3737,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.05,
24
+ "learning_rate": 8.699999999999999e-05,
25
+ "loss": 2.0938,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.06,
30
+ "learning_rate": 0.00011399999999999999,
31
+ "loss": 1.4761,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.08,
36
+ "learning_rate": 0.00014399999999999998,
37
+ "loss": 1.0161,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.08,
42
+ "eval_loss": 0.9799935817718506,
43
+ "eval_runtime": 9.5951,
44
+ "eval_samples_per_second": 20.844,
45
+ "eval_steps_per_second": 2.605,
46
+ "step": 50
47
+ },
48
+ {
49
+ "epoch": 0.09,
50
+ "learning_rate": 0.00017399999999999997,
51
+ "loss": 0.9253,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.11,
56
+ "learning_rate": 0.000204,
57
+ "loss": 0.8183,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.12,
62
+ "learning_rate": 0.000234,
63
+ "loss": 0.7652,
64
+ "step": 80
65
+ },
66
+ {
67
+ "epoch": 0.14,
68
+ "learning_rate": 0.00026399999999999997,
69
+ "loss": 0.7233,
70
+ "step": 90
71
+ },
72
+ {
73
+ "epoch": 0.16,
74
+ "learning_rate": 0.000294,
75
+ "loss": 0.7036,
76
+ "step": 100
77
+ },
78
+ {
79
+ "epoch": 0.16,
80
+ "eval_loss": 0.7070202827453613,
81
+ "eval_runtime": 9.6202,
82
+ "eval_samples_per_second": 20.79,
83
+ "eval_steps_per_second": 2.599,
84
+ "step": 100
85
+ },
86
+ {
87
+ "epoch": 0.17,
88
+ "learning_rate": 0.00028799999999999995,
89
+ "loss": 0.6666,
90
+ "step": 110
91
+ },
92
+ {
93
+ "epoch": 0.19,
94
+ "learning_rate": 0.00027299999999999997,
95
+ "loss": 0.6375,
96
+ "step": 120
97
+ },
98
+ {
99
+ "epoch": 0.2,
100
+ "learning_rate": 0.000258,
101
+ "loss": 0.6207,
102
+ "step": 130
103
+ },
104
+ {
105
+ "epoch": 0.22,
106
+ "learning_rate": 0.000243,
107
+ "loss": 0.6246,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.23,
112
+ "learning_rate": 0.00022799999999999999,
113
+ "loss": 0.6133,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.23,
118
+ "eval_loss": 0.6191394925117493,
119
+ "eval_runtime": 9.5788,
120
+ "eval_samples_per_second": 20.879,
121
+ "eval_steps_per_second": 2.61,
122
+ "step": 150
123
+ },
124
+ {
125
+ "epoch": 0.25,
126
+ "learning_rate": 0.00021299999999999997,
127
+ "loss": 0.6044,
128
+ "step": 160
129
+ },
130
+ {
131
+ "epoch": 0.26,
132
+ "learning_rate": 0.000198,
133
+ "loss": 0.6098,
134
+ "step": 170
135
+ },
136
+ {
137
+ "epoch": 0.28,
138
+ "learning_rate": 0.00018299999999999998,
139
+ "loss": 0.6011,
140
+ "step": 180
141
+ },
142
+ {
143
+ "epoch": 0.3,
144
+ "learning_rate": 0.000168,
145
+ "loss": 0.594,
146
+ "step": 190
147
+ },
148
+ {
149
+ "epoch": 0.31,
150
+ "learning_rate": 0.00015299999999999998,
151
+ "loss": 0.6002,
152
+ "step": 200
153
+ },
154
+ {
155
+ "epoch": 0.31,
156
+ "eval_loss": 0.6030644774436951,
157
+ "eval_runtime": 9.6172,
158
+ "eval_samples_per_second": 20.796,
159
+ "eval_steps_per_second": 2.6,
160
+ "step": 200
161
+ },
162
+ {
163
+ "epoch": 0.33,
164
+ "learning_rate": 0.000138,
165
+ "loss": 0.5836,
166
+ "step": 210
167
+ },
168
+ {
169
+ "epoch": 0.34,
170
+ "learning_rate": 0.00012299999999999998,
171
+ "loss": 0.5911,
172
+ "step": 220
173
+ },
174
+ {
175
+ "epoch": 0.36,
176
+ "learning_rate": 0.00010799999999999998,
177
+ "loss": 0.5946,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 0.37,
182
+ "learning_rate": 9.3e-05,
183
+ "loss": 0.5904,
184
+ "step": 240
185
+ },
186
+ {
187
+ "epoch": 0.39,
188
+ "learning_rate": 7.8e-05,
189
+ "loss": 0.5853,
190
+ "step": 250
191
+ },
192
+ {
193
+ "epoch": 0.39,
194
+ "eval_loss": 0.594174325466156,
195
+ "eval_runtime": 9.5893,
196
+ "eval_samples_per_second": 20.857,
197
+ "eval_steps_per_second": 2.607,
198
+ "step": 250
199
+ }
200
+ ],
201
+ "max_steps": 300,
202
+ "num_train_epochs": 1,
203
+ "total_flos": 2.2062818402107392e+17,
204
+ "trial_name": null,
205
+ "trial_params": null
206
+ }
experiments/checkpoint-250/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be4ec138b6339576c00fef284fd852b5850365d5171336195581652280fa859d
3
+ size 3899
experiments/checkpoint-300/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c6f1c3758082520d940b854c2d25b58a3f83aa3330aa9001e9d83434d2b7634
3
+ size 33661637
experiments/checkpoint-300/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e54e3c5b13d55e55821cb7c88f704123c5de2c7617cd137d740f1ed06b80e2df
3
+ size 16822989
experiments/checkpoint-300/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c5f3a418c95e7400992c8f8e6e47e0843d0dfa05067f1d650c66a3a05f3f09a
3
+ size 14575
experiments/checkpoint-300/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473957f40dfb57902efb831c2282a51b3b67e7025fd09020b4382671aa15b281
3
+ size 627
experiments/checkpoint-300/trainer_state.json ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5911636352539062,
3
+ "best_model_checkpoint": "experiments/checkpoint-300",
4
+ "epoch": 0.4669260700389105,
5
+ "global_step": 300,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 2.9999999999999997e-05,
13
+ "loss": 2.5075,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.03,
18
+ "learning_rate": 5.6999999999999996e-05,
19
+ "loss": 2.3737,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.05,
24
+ "learning_rate": 8.699999999999999e-05,
25
+ "loss": 2.0938,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.06,
30
+ "learning_rate": 0.00011399999999999999,
31
+ "loss": 1.4761,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.08,
36
+ "learning_rate": 0.00014399999999999998,
37
+ "loss": 1.0161,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.08,
42
+ "eval_loss": 0.9799935817718506,
43
+ "eval_runtime": 9.5951,
44
+ "eval_samples_per_second": 20.844,
45
+ "eval_steps_per_second": 2.605,
46
+ "step": 50
47
+ },
48
+ {
49
+ "epoch": 0.09,
50
+ "learning_rate": 0.00017399999999999997,
51
+ "loss": 0.9253,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.11,
56
+ "learning_rate": 0.000204,
57
+ "loss": 0.8183,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.12,
62
+ "learning_rate": 0.000234,
63
+ "loss": 0.7652,
64
+ "step": 80
65
+ },
66
+ {
67
+ "epoch": 0.14,
68
+ "learning_rate": 0.00026399999999999997,
69
+ "loss": 0.7233,
70
+ "step": 90
71
+ },
72
+ {
73
+ "epoch": 0.16,
74
+ "learning_rate": 0.000294,
75
+ "loss": 0.7036,
76
+ "step": 100
77
+ },
78
+ {
79
+ "epoch": 0.16,
80
+ "eval_loss": 0.7070202827453613,
81
+ "eval_runtime": 9.6202,
82
+ "eval_samples_per_second": 20.79,
83
+ "eval_steps_per_second": 2.599,
84
+ "step": 100
85
+ },
86
+ {
87
+ "epoch": 0.17,
88
+ "learning_rate": 0.00028799999999999995,
89
+ "loss": 0.6666,
90
+ "step": 110
91
+ },
92
+ {
93
+ "epoch": 0.19,
94
+ "learning_rate": 0.00027299999999999997,
95
+ "loss": 0.6375,
96
+ "step": 120
97
+ },
98
+ {
99
+ "epoch": 0.2,
100
+ "learning_rate": 0.000258,
101
+ "loss": 0.6207,
102
+ "step": 130
103
+ },
104
+ {
105
+ "epoch": 0.22,
106
+ "learning_rate": 0.000243,
107
+ "loss": 0.6246,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.23,
112
+ "learning_rate": 0.00022799999999999999,
113
+ "loss": 0.6133,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.23,
118
+ "eval_loss": 0.6191394925117493,
119
+ "eval_runtime": 9.5788,
120
+ "eval_samples_per_second": 20.879,
121
+ "eval_steps_per_second": 2.61,
122
+ "step": 150
123
+ },
124
+ {
125
+ "epoch": 0.25,
126
+ "learning_rate": 0.00021299999999999997,
127
+ "loss": 0.6044,
128
+ "step": 160
129
+ },
130
+ {
131
+ "epoch": 0.26,
132
+ "learning_rate": 0.000198,
133
+ "loss": 0.6098,
134
+ "step": 170
135
+ },
136
+ {
137
+ "epoch": 0.28,
138
+ "learning_rate": 0.00018299999999999998,
139
+ "loss": 0.6011,
140
+ "step": 180
141
+ },
142
+ {
143
+ "epoch": 0.3,
144
+ "learning_rate": 0.000168,
145
+ "loss": 0.594,
146
+ "step": 190
147
+ },
148
+ {
149
+ "epoch": 0.31,
150
+ "learning_rate": 0.00015299999999999998,
151
+ "loss": 0.6002,
152
+ "step": 200
153
+ },
154
+ {
155
+ "epoch": 0.31,
156
+ "eval_loss": 0.6030644774436951,
157
+ "eval_runtime": 9.6172,
158
+ "eval_samples_per_second": 20.796,
159
+ "eval_steps_per_second": 2.6,
160
+ "step": 200
161
+ },
162
+ {
163
+ "epoch": 0.33,
164
+ "learning_rate": 0.000138,
165
+ "loss": 0.5836,
166
+ "step": 210
167
+ },
168
+ {
169
+ "epoch": 0.34,
170
+ "learning_rate": 0.00012299999999999998,
171
+ "loss": 0.5911,
172
+ "step": 220
173
+ },
174
+ {
175
+ "epoch": 0.36,
176
+ "learning_rate": 0.00010799999999999998,
177
+ "loss": 0.5946,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 0.37,
182
+ "learning_rate": 9.3e-05,
183
+ "loss": 0.5904,
184
+ "step": 240
185
+ },
186
+ {
187
+ "epoch": 0.39,
188
+ "learning_rate": 7.8e-05,
189
+ "loss": 0.5853,
190
+ "step": 250
191
+ },
192
+ {
193
+ "epoch": 0.39,
194
+ "eval_loss": 0.594174325466156,
195
+ "eval_runtime": 9.5893,
196
+ "eval_samples_per_second": 20.857,
197
+ "eval_steps_per_second": 2.607,
198
+ "step": 250
199
+ },
200
+ {
201
+ "epoch": 0.4,
202
+ "learning_rate": 6.299999999999999e-05,
203
+ "loss": 0.5778,
204
+ "step": 260
205
+ },
206
+ {
207
+ "epoch": 0.42,
208
+ "learning_rate": 4.7999999999999994e-05,
209
+ "loss": 0.5837,
210
+ "step": 270
211
+ },
212
+ {
213
+ "epoch": 0.44,
214
+ "learning_rate": 3.2999999999999996e-05,
215
+ "loss": 0.574,
216
+ "step": 280
217
+ },
218
+ {
219
+ "epoch": 0.45,
220
+ "learning_rate": 1.7999999999999997e-05,
221
+ "loss": 0.5868,
222
+ "step": 290
223
+ },
224
+ {
225
+ "epoch": 0.47,
226
+ "learning_rate": 2.9999999999999997e-06,
227
+ "loss": 0.5822,
228
+ "step": 300
229
+ },
230
+ {
231
+ "epoch": 0.47,
232
+ "eval_loss": 0.5911636352539062,
233
+ "eval_runtime": 9.6181,
234
+ "eval_samples_per_second": 20.794,
235
+ "eval_steps_per_second": 2.599,
236
+ "step": 300
237
+ }
238
+ ],
239
+ "max_steps": 300,
240
+ "num_train_epochs": 1,
241
+ "total_flos": 2.6489142544171008e+17,
242
+ "trial_name": null,
243
+ "trial_params": null
244
+ }
experiments/checkpoint-300/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be4ec138b6339576c00fef284fd852b5850365d5171336195581652280fa859d
3
+ size 3899
experiments/runs/Jun01_05-20-38_342e8c5b39dd/1685596839.0285306/events.out.tfevents.1685596839.342e8c5b39dd.10274.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3aecdc3e3b0d4e81dfaa732e0f498a36283db55116e45fcb10636da1a26a575
3
+ size 5868
experiments/runs/Jun01_05-20-38_342e8c5b39dd/1685596845.6246781/events.out.tfevents.1685596845.342e8c5b39dd.10274.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67bf0346552d0687fb392df5135c1c8c1351735c1ca7d4757882bb2ea02ff116
3
+ size 5868
experiments/runs/Jun01_05-20-38_342e8c5b39dd/events.out.tfevents.1685596839.342e8c5b39dd.10274.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0813a51e29de49d6778894f3d027678813974276862fed2ea3ff9fa4697a1714
3
+ size 88
experiments/runs/Jun01_05-20-38_342e8c5b39dd/events.out.tfevents.1685596845.342e8c5b39dd.10274.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:526c807cec22525b86393df7a49b59bb9531443e9c745abed2bfaf259f1077a3
3
+ size 88
experiments/runs/Jun01_05-25-44_342e8c5b39dd/1685597145.1742842/events.out.tfevents.1685597145.342e8c5b39dd.13098.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c77bab4797108f90d14935eca73e7d6d31d69226cabaa36e1ba09748d5f017aa
3
+ size 5904
experiments/runs/Jun01_05-25-44_342e8c5b39dd/events.out.tfevents.1685597145.342e8c5b39dd.13098.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82349c31bce8248fce0a6bdb56f0da8debc0aa713e19594f941a4572920a1ffc
3
+ size 6679
experiments/runs/Jun01_07-13-15_090d8d4aa7e2/1685603596.0617163/events.out.tfevents.1685603596.090d8d4aa7e2.1898.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa9817e1e0a23e8b8e6123cd3f6f33960e34a4be8de098d5a16dc42a805296b5
3
+ size 5904
experiments/runs/Jun01_07-13-15_090d8d4aa7e2/events.out.tfevents.1685603596.090d8d4aa7e2.1898.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a97d11864f539660db2b76acd30977a428652fe8f181c64c074ed6078c5e1a4f
3
+ size 4915
experiments/runs/Jun01_08-58-37_11b0426da60f/1685609918.3124719/events.out.tfevents.1685609918.11b0426da60f.611.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14772179c98698c1691a1150f1d29078e402f4357c57ccbd34873c66eac60f05
3
+ size 5904
experiments/runs/Jun01_08-58-37_11b0426da60f/events.out.tfevents.1685609918.11b0426da60f.611.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30acb00ba89945ec3ff9b0fd151540d8bd6c9241038b69deea7bfe25c36451a3
3
+ size 10943
experiments/runs/Jun01_09-00-06_05555eff8aef/1685610006.9899611/events.out.tfevents.1685610007.05555eff8aef.2638.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45748c8c802b3454a18abecf8ef3f422519bea4ad417df8393e1ef54bedf9513
3
+ size 5904
experiments/runs/Jun01_09-00-06_05555eff8aef/events.out.tfevents.1685610006.05555eff8aef.2638.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4519824f00a804671603fa3dcd76c1fe6b948088bc3b45207c49f31bd38c06de
3
+ size 7892
experiments/runs/Jun01_16-23-40_05555eff8aef/1685636620.8436341/events.out.tfevents.1685636620.05555eff8aef.113292.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccf0908097aeaad223511b32b0ab7ac61aabebad86f1ee61ff1d3d19fbdcee50
3
+ size 5904
experiments/runs/Jun01_16-23-40_05555eff8aef/events.out.tfevents.1685636620.05555eff8aef.113292.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3d8da01e8a529e5da4c5a442486dfcfea0b6a2e9ce1ba37b83580f96dcf8b79
3
+ size 4761
experiments/runs/Jun01_19-42-51_d1618e311822/1685648572.7311761/events.out.tfevents.1685648572.d1618e311822.5586.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:291c88c22ebf7db54515c4236949550e1b01087d8ac8b884aa3426b8cb7c0428
3
+ size 5904
experiments/runs/Jun01_19-42-51_d1618e311822/events.out.tfevents.1685648572.d1618e311822.5586.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:913581ad368589620dd81f221d235192b5e1456e3cd238c42a303b5b8c68a47d
3
+ size 10943