Ethan Sim commited on
Commit
7f220eb
Β·
1 Parent(s): dad8e42

stage big adaptified wce model

Browse files
{checkpoint-80000 β†’ checkpoint-72000}/config.json RENAMED
File without changes
{checkpoint-80000 β†’ checkpoint-72000}/generation_config.json RENAMED
File without changes
{checkpoint-80000 β†’ checkpoint-72000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d526a148601e099edf158a9fc78882a39ab08130a90a882333ac7769de73029
3
  size 1845323269
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dee553ed3c07b358d14aba246ae7b15c397754d5368d73af63d4802feb85590b
3
  size 1845323269
{checkpoint-80000 β†’ checkpoint-72000}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5bb1822df70a2771409341ffb3c06e2972a8b09ee543aa179e806382e943316
3
  size 922885701
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c73120ed5f0886eb579215ffef18fdc6e90ab5b7c32a765c31c5c58611905824
3
  size 922885701
{checkpoint-80000 β†’ checkpoint-72000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1c5de039757d33c044a58924fca47bef181f366cb5e1ad6f648addfff1fd7db
3
- size 14511
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b985e593f49e2226d0de3ec98059c138ef1c127049448165d750d7ea39566354
3
+ size 14575
{checkpoint-80000 β†’ checkpoint-72000}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08ed45a31983d0daf4eea0c1495c9670548fa97b03f0a2771e3022ca5c1dd14b
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394805827a2ba904597382fb5ff73573627c5788f891ee76ba1705571f4a171b
3
  size 557
{checkpoint-80000 β†’ checkpoint-72000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3af67343aa0f8b4109961c1ed1ad806d5b136e6b0592737a57873f77d6c9558
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22fc51666a7893fc87aad62701dce4682bace66d9f66e719993c37b1a582b131
3
  size 627
{checkpoint-80000 β†’ checkpoint-72000}/source.spm RENAMED
File without changes
{checkpoint-80000 β†’ checkpoint-72000}/special_tokens_map.json RENAMED
File without changes
{checkpoint-80000 β†’ checkpoint-72000}/target.spm RENAMED
File without changes
{checkpoint-80000 β†’ checkpoint-72000}/tokenizer_config.json RENAMED
File without changes
{checkpoint-80000 β†’ checkpoint-72000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 43.3052,
3
- "best_model_checkpoint": "opus_big_enfr_FT_adapt_wce/checkpoint-80000",
4
- "epoch": 1.9716574245224892,
5
- "global_step": 80000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,167 +10,151 @@
10
  {
11
  "epoch": 0.2,
12
  "learning_rate": 1.9753635243376465e-05,
13
- "loss": 0.1755,
14
  "step": 8000
15
  },
16
  {
17
  "epoch": 0.2,
18
- "eval_bleu": 42.1676,
19
- "eval_gen_len": 33.629,
20
- "eval_loss": 0.09851773828268051,
21
- "eval_runtime": 118.9906,
22
- "eval_samples_per_second": 8.765,
23
- "eval_steps_per_second": 0.555,
24
  "step": 8000
25
  },
26
  {
27
  "epoch": 0.39,
28
- "learning_rate": 1.950723967960567e-05,
29
- "loss": 0.1449,
30
  "step": 16000
31
  },
32
  {
33
  "epoch": 0.39,
34
- "eval_bleu": 42.3472,
35
- "eval_gen_len": 34.117,
36
- "eval_loss": 0.09749302268028259,
37
- "eval_runtime": 122.5206,
38
- "eval_samples_per_second": 8.513,
39
- "eval_steps_per_second": 0.539,
40
  "step": 16000
41
  },
42
  {
43
  "epoch": 0.59,
44
- "learning_rate": 1.9260844115834877e-05,
45
- "loss": 0.1409,
46
  "step": 24000
47
  },
48
  {
49
  "epoch": 0.59,
50
- "eval_bleu": 42.8397,
51
- "eval_gen_len": 33.372,
52
- "eval_loss": 0.09753246605396271,
53
- "eval_runtime": 109.5101,
54
- "eval_samples_per_second": 9.524,
55
- "eval_steps_per_second": 0.603,
56
  "step": 24000
57
  },
58
  {
59
  "epoch": 0.79,
60
- "learning_rate": 1.90145101663586e-05,
61
- "loss": 0.1371,
62
  "step": 32000
63
  },
64
  {
65
  "epoch": 0.79,
66
- "eval_bleu": 42.3685,
67
- "eval_gen_len": 34.162,
68
- "eval_loss": 0.09639725089073181,
69
- "eval_runtime": 110.7923,
70
- "eval_samples_per_second": 9.414,
71
- "eval_steps_per_second": 0.596,
72
  "step": 32000
73
  },
74
  {
75
  "epoch": 0.99,
76
  "learning_rate": 1.8768114602587803e-05,
77
- "loss": 0.1357,
78
  "step": 40000
79
  },
80
  {
81
  "epoch": 0.99,
82
- "eval_bleu": 42.5122,
83
- "eval_gen_len": 34.0192,
84
- "eval_loss": 0.09595585614442825,
85
- "eval_runtime": 115.7491,
86
- "eval_samples_per_second": 9.011,
87
- "eval_steps_per_second": 0.57,
88
  "step": 40000
89
  },
90
  {
91
  "epoch": 1.18,
92
- "learning_rate": 1.8521719038817007e-05,
93
- "loss": 0.1208,
94
  "step": 48000
95
  },
96
  {
97
  "epoch": 1.18,
98
- "eval_bleu": 42.8241,
99
- "eval_gen_len": 33.8121,
100
- "eval_loss": 0.09709044545888901,
101
- "eval_runtime": 109.8769,
102
- "eval_samples_per_second": 9.492,
103
- "eval_steps_per_second": 0.601,
104
  "step": 48000
105
  },
106
  {
107
  "epoch": 1.38,
108
- "learning_rate": 1.827538508934073e-05,
109
- "loss": 0.1211,
110
  "step": 56000
111
  },
112
  {
113
  "epoch": 1.38,
114
- "eval_bleu": 42.4096,
115
- "eval_gen_len": 34.0662,
116
- "eval_loss": 0.09704224020242691,
117
- "eval_runtime": 127.711,
118
- "eval_samples_per_second": 8.167,
119
- "eval_steps_per_second": 0.517,
120
  "step": 56000
121
  },
122
  {
123
  "epoch": 1.58,
124
- "learning_rate": 1.8028989525569933e-05,
125
- "loss": 0.1217,
126
  "step": 64000
127
  },
128
  {
129
  "epoch": 1.58,
130
- "eval_bleu": 42.9752,
131
- "eval_gen_len": 34.0479,
132
- "eval_loss": 0.09673523902893066,
133
- "eval_runtime": 124.3803,
134
- "eval_samples_per_second": 8.386,
135
- "eval_steps_per_second": 0.531,
136
  "step": 64000
137
  },
138
  {
139
  "epoch": 1.77,
140
- "learning_rate": 1.7782624768946397e-05,
141
- "loss": 0.1213,
142
  "step": 72000
143
  },
144
  {
145
  "epoch": 1.77,
146
- "eval_bleu": 42.7247,
147
- "eval_gen_len": 33.9243,
148
- "eval_loss": 0.09718381613492966,
149
- "eval_runtime": 122.3128,
150
- "eval_samples_per_second": 8.527,
151
- "eval_steps_per_second": 0.54,
152
  "step": 72000
153
- },
154
- {
155
- "epoch": 1.97,
156
- "learning_rate": 1.753626001232286e-05,
157
- "loss": 0.1211,
158
- "step": 80000
159
- },
160
- {
161
- "epoch": 1.97,
162
- "eval_bleu": 43.3052,
163
- "eval_gen_len": 33.743,
164
- "eval_loss": 0.09632089734077454,
165
- "eval_runtime": 163.2845,
166
- "eval_samples_per_second": 6.388,
167
- "eval_steps_per_second": 0.404,
168
- "step": 80000
169
  }
170
  ],
171
  "max_steps": 649200,
172
  "num_train_epochs": 16,
173
- "total_flos": 2.5844916958632346e+17,
174
  "trial_name": null,
175
  "trial_params": null
176
  }
 
1
  {
2
+ "best_metric": 42.9999,
3
+ "best_model_checkpoint": "opus_big_wce_adaptified/checkpoint-72000",
4
+ "epoch": 1.7744916820702403,
5
+ "global_step": 72000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10
  {
11
  "epoch": 0.2,
12
  "learning_rate": 1.9753635243376465e-05,
13
+ "loss": 0.1747,
14
  "step": 8000
15
  },
16
  {
17
  "epoch": 0.2,
18
+ "eval_bleu": 42.2846,
19
+ "eval_gen_len": 33.5896,
20
+ "eval_loss": 0.0983600914478302,
21
+ "eval_runtime": 122.9468,
22
+ "eval_samples_per_second": 8.483,
23
+ "eval_steps_per_second": 0.537,
24
  "step": 8000
25
  },
26
  {
27
  "epoch": 0.39,
28
+ "learning_rate": 1.9507208872458414e-05,
29
+ "loss": 0.1441,
30
  "step": 16000
31
  },
32
  {
33
  "epoch": 0.39,
34
+ "eval_bleu": 42.1887,
35
+ "eval_gen_len": 34.0508,
36
+ "eval_loss": 0.097293421626091,
37
+ "eval_runtime": 162.7475,
38
+ "eval_samples_per_second": 6.409,
39
+ "eval_steps_per_second": 0.406,
40
  "step": 16000
41
  },
42
  {
43
  "epoch": 0.59,
44
+ "learning_rate": 1.9260874922982132e-05,
45
+ "loss": 0.1402,
46
  "step": 24000
47
  },
48
  {
49
  "epoch": 0.59,
50
+ "eval_bleu": 42.7367,
51
+ "eval_gen_len": 33.7747,
52
+ "eval_loss": 0.09722033143043518,
53
+ "eval_runtime": 127.375,
54
+ "eval_samples_per_second": 8.188,
55
+ "eval_steps_per_second": 0.518,
56
  "step": 24000
57
  },
58
  {
59
  "epoch": 0.79,
60
+ "learning_rate": 1.901447935921134e-05,
61
+ "loss": 0.1364,
62
  "step": 32000
63
  },
64
  {
65
  "epoch": 0.79,
66
+ "eval_bleu": 42.9551,
67
+ "eval_gen_len": 34.4238,
68
+ "eval_loss": 0.0965743437409401,
69
+ "eval_runtime": 120.0033,
70
+ "eval_samples_per_second": 8.691,
71
+ "eval_steps_per_second": 0.55,
72
  "step": 32000
73
  },
74
  {
75
  "epoch": 0.99,
76
  "learning_rate": 1.8768114602587803e-05,
77
+ "loss": 0.135,
78
  "step": 40000
79
  },
80
  {
81
  "epoch": 0.99,
82
+ "eval_bleu": 42.7141,
83
+ "eval_gen_len": 34.0872,
84
+ "eval_loss": 0.09599015861749649,
85
+ "eval_runtime": 144.0409,
86
+ "eval_samples_per_second": 7.241,
87
+ "eval_steps_per_second": 0.458,
88
  "step": 40000
89
  },
90
  {
91
  "epoch": 1.18,
92
+ "learning_rate": 1.8521749845964266e-05,
93
+ "loss": 0.1203,
94
  "step": 48000
95
  },
96
  {
97
  "epoch": 1.18,
98
+ "eval_bleu": 42.4802,
99
+ "eval_gen_len": 34.1266,
100
+ "eval_loss": 0.09718813002109528,
101
+ "eval_runtime": 121.2752,
102
+ "eval_samples_per_second": 8.6,
103
+ "eval_steps_per_second": 0.544,
104
  "step": 48000
105
  },
106
  {
107
  "epoch": 1.38,
108
+ "learning_rate": 1.827535428219347e-05,
109
+ "loss": 0.1205,
110
  "step": 56000
111
  },
112
  {
113
  "epoch": 1.38,
114
+ "eval_bleu": 42.6361,
115
+ "eval_gen_len": 34.1045,
116
+ "eval_loss": 0.0969875305891037,
117
+ "eval_runtime": 137.2112,
118
+ "eval_samples_per_second": 7.601,
119
+ "eval_steps_per_second": 0.481,
120
  "step": 56000
121
  },
122
  {
123
  "epoch": 1.58,
124
+ "learning_rate": 1.8029020332717192e-05,
125
+ "loss": 0.1211,
126
  "step": 64000
127
  },
128
  {
129
  "epoch": 1.58,
130
+ "eval_bleu": 42.8271,
131
+ "eval_gen_len": 34.4851,
132
+ "eval_loss": 0.09680665284395218,
133
+ "eval_runtime": 134.8615,
134
+ "eval_samples_per_second": 7.734,
135
+ "eval_steps_per_second": 0.489,
136
  "step": 64000
137
  },
138
  {
139
  "epoch": 1.77,
140
+ "learning_rate": 1.7782655576093655e-05,
141
+ "loss": 0.1207,
142
  "step": 72000
143
  },
144
  {
145
  "epoch": 1.77,
146
+ "eval_bleu": 42.9999,
147
+ "eval_gen_len": 34.0671,
148
+ "eval_loss": 0.09675087034702301,
149
+ "eval_runtime": 110.3226,
150
+ "eval_samples_per_second": 9.454,
151
+ "eval_steps_per_second": 0.598,
152
  "step": 72000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  }
154
  ],
155
  "max_steps": 649200,
156
  "num_train_epochs": 16,
157
+ "total_flos": 2.3265115550672486e+17,
158
  "trial_name": null,
159
  "trial_params": null
160
  }
{checkpoint-80000 β†’ checkpoint-72000}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c3be397238354cb3b7021722caa2999c3a8a27790d6ef14c14cfd7d38f7afb9
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cf5ee38f2194704c244e5c80610b2c3775d2eb9a6a936f61d49a9795e266a86
3
  size 3771
{checkpoint-80000 β†’ checkpoint-72000}/vocab.json RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5bb1822df70a2771409341ffb3c06e2972a8b09ee543aa179e806382e943316
3
  size 922885701
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c73120ed5f0886eb579215ffef18fdc6e90ab5b7c32a765c31c5c58611905824
3
  size 922885701
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c3be397238354cb3b7021722caa2999c3a8a27790d6ef14c14cfd7d38f7afb9
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cf5ee38f2194704c244e5c80610b2c3775d2eb9a6a936f61d49a9795e266a86
3
  size 3771