dq158 commited on
Commit
3201ed5
·
1 Parent(s): a8fe027

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -56,7 +56,7 @@
56
  },
57
  "tie_word_embeddings": false,
58
  "torch_dtype": "float32",
59
- "transformers_version": "4.34.0",
60
  "use_cache": true,
61
  "vocab_size": 32128
62
  }
 
56
  },
57
  "tie_word_embeddings": false,
58
  "torch_dtype": "float32",
59
+ "transformers_version": "4.34.1",
60
  "use_cache": true,
61
  "vocab_size": 32128
62
  }
last-checkpoint/generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
- "transformers_version": "4.34.0"
6
  }
 
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
+ "transformers_version": "4.34.1"
6
  }
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edbf65f6f6b2d902ca192fe35b3efd93add0d803274b4def7bd26dee546982fe
3
- size 1980860410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc5f7966ab751e227a273a7acdbf9909b12ac56502470ca806ef6c19af692daa
3
+ size 1832
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9519ad1f203ec112bc005ea3f5aacffca3d057ed940f03bc40440539df41b908
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edda3e180dc7f3b528ecce2ee7125d509e7bce9dda2b822c296040dda25123dc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4166a33b5520e0dfb0734abb46fce8eedf9514c1a2270d14609538201a3fae48
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1860c7c6ca1e473d03a7cae1a98bbfbcae9f1a1ba246b538abe04ba78364f93
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,198 +1,38 @@
1
  {
2
- "best_metric": 1.4382692575454712,
3
- "best_model_checkpoint": "dq158/morbius/checkpoint-7375",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 7375,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.34,
13
- "learning_rate": 4.6610169491525425e-05,
14
- "loss": 1.8922,
15
- "step": 500
16
- },
17
- {
18
- "epoch": 0.68,
19
- "learning_rate": 4.3220338983050854e-05,
20
- "loss": 1.7522,
21
- "step": 1000
22
- },
23
  {
24
  "epoch": 1.0,
25
  "eval_bleu": 1.0,
26
  "eval_brevity_penalty": 1.0,
27
  "eval_length_ratio": 1.0,
28
- "eval_loss": 1.4803038835525513,
29
- "eval_precisions": [
30
- 1.0,
31
- 1.0,
32
- 1.0,
33
- 1.0
34
- ],
35
- "eval_reference_length": 35996,
36
- "eval_runtime": 295.5668,
37
- "eval_samples_per_second": 6.652,
38
- "eval_steps_per_second": 0.555,
39
- "eval_translation_length": 35996,
40
- "step": 1475
41
- },
42
- {
43
- "epoch": 1.02,
44
- "learning_rate": 3.983050847457627e-05,
45
- "loss": 1.7275,
46
- "step": 1500
47
- },
48
- {
49
- "epoch": 1.36,
50
- "learning_rate": 3.644067796610169e-05,
51
- "loss": 1.6924,
52
- "step": 2000
53
- },
54
- {
55
- "epoch": 1.69,
56
- "learning_rate": 3.305084745762712e-05,
57
- "loss": 1.6815,
58
- "step": 2500
59
- },
60
- {
61
- "epoch": 2.0,
62
- "eval_bleu": 1.0,
63
- "eval_brevity_penalty": 1.0,
64
- "eval_length_ratio": 1.0,
65
- "eval_loss": 1.4565129280090332,
66
- "eval_precisions": [
67
- 1.0,
68
- 1.0,
69
- 1.0,
70
- 1.0
71
- ],
72
- "eval_reference_length": 35935,
73
- "eval_runtime": 291.4973,
74
- "eval_samples_per_second": 6.744,
75
- "eval_steps_per_second": 0.563,
76
- "eval_translation_length": 35935,
77
- "step": 2950
78
- },
79
- {
80
- "epoch": 2.03,
81
- "learning_rate": 2.9661016949152544e-05,
82
- "loss": 1.6543,
83
- "step": 3000
84
- },
85
- {
86
- "epoch": 2.37,
87
- "learning_rate": 2.627118644067797e-05,
88
- "loss": 1.6515,
89
- "step": 3500
90
- },
91
- {
92
- "epoch": 2.71,
93
- "learning_rate": 2.2881355932203392e-05,
94
- "loss": 1.6408,
95
- "step": 4000
96
- },
97
- {
98
- "epoch": 3.0,
99
- "eval_bleu": 1.0,
100
- "eval_brevity_penalty": 1.0,
101
- "eval_length_ratio": 1.0,
102
- "eval_loss": 1.446006178855896,
103
- "eval_precisions": [
104
- 1.0,
105
- 1.0,
106
- 1.0,
107
- 1.0
108
- ],
109
- "eval_reference_length": 36005,
110
- "eval_runtime": 291.2917,
111
- "eval_samples_per_second": 6.749,
112
- "eval_steps_per_second": 0.563,
113
- "eval_translation_length": 36005,
114
- "step": 4425
115
- },
116
- {
117
- "epoch": 3.05,
118
- "learning_rate": 1.9491525423728814e-05,
119
- "loss": 1.6183,
120
- "step": 4500
121
- },
122
- {
123
- "epoch": 3.39,
124
- "learning_rate": 1.6101694915254237e-05,
125
- "loss": 1.6291,
126
- "step": 5000
127
- },
128
- {
129
- "epoch": 3.73,
130
- "learning_rate": 1.2711864406779661e-05,
131
- "loss": 1.6058,
132
- "step": 5500
133
- },
134
- {
135
- "epoch": 4.0,
136
- "eval_bleu": 1.0,
137
- "eval_brevity_penalty": 1.0,
138
- "eval_length_ratio": 1.0,
139
- "eval_loss": 1.4407896995544434,
140
- "eval_precisions": [
141
- 1.0,
142
- 1.0,
143
- 1.0,
144
- 1.0
145
- ],
146
- "eval_reference_length": 36007,
147
- "eval_runtime": 291.6745,
148
- "eval_samples_per_second": 6.74,
149
- "eval_steps_per_second": 0.562,
150
- "eval_translation_length": 36007,
151
- "step": 5900
152
- },
153
- {
154
- "epoch": 4.07,
155
- "learning_rate": 9.322033898305085e-06,
156
- "loss": 1.6139,
157
- "step": 6000
158
- },
159
- {
160
- "epoch": 4.41,
161
- "learning_rate": 5.932203389830509e-06,
162
- "loss": 1.5939,
163
- "step": 6500
164
- },
165
- {
166
- "epoch": 4.75,
167
- "learning_rate": 2.5423728813559323e-06,
168
- "loss": 1.5956,
169
- "step": 7000
170
- },
171
- {
172
- "epoch": 5.0,
173
- "eval_bleu": 1.0,
174
- "eval_brevity_penalty": 1.0,
175
- "eval_length_ratio": 1.0,
176
- "eval_loss": 1.4382692575454712,
177
  "eval_precisions": [
178
  1.0,
179
  1.0,
180
  1.0,
181
  1.0
182
  ],
183
- "eval_reference_length": 36000,
184
- "eval_runtime": 292.0466,
185
- "eval_samples_per_second": 6.732,
186
- "eval_steps_per_second": 0.562,
187
- "eval_translation_length": 36000,
188
- "step": 7375
189
  }
190
  ],
191
  "logging_steps": 500,
192
- "max_steps": 7375,
193
- "num_train_epochs": 5,
194
  "save_steps": 500,
195
- "total_flos": 6.058048297107456e+16,
196
  "trial_name": null,
197
  "trial_params": null
198
  }
 
1
  {
2
+ "best_metric": NaN,
3
+ "best_model_checkpoint": "dq158/morbius/checkpoint-144",
4
+ "epoch": 0.9956784788245462,
5
  "eval_steps": 500,
6
+ "global_step": 144,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 1.0,
13
  "eval_bleu": 1.0,
14
  "eval_brevity_penalty": 1.0,
15
  "eval_length_ratio": 1.0,
16
+ "eval_loss": NaN,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  "eval_precisions": [
18
  1.0,
19
  1.0,
20
  1.0,
21
  1.0
22
  ],
23
+ "eval_reference_length": 18771,
24
+ "eval_runtime": 122.4923,
25
+ "eval_samples_per_second": 8.392,
26
+ "eval_steps_per_second": 1.053,
27
+ "eval_translation_length": 18771,
28
+ "step": 144
29
  }
30
  ],
31
  "logging_steps": 500,
32
+ "max_steps": 2880,
33
+ "num_train_epochs": 20,
34
  "save_steps": 500,
35
+ "total_flos": 6335375024848896.0,
36
  "trial_name": null,
37
  "trial_params": null
38
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb8f93fe1bfa9a74eac46b5edcc31c9e6cad75affbcfaa221d6576a2517e1b3e
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e101b52461267c553f7820ea490d90f0c52a8eee984bb696ad906e06ac49bd9a
3
  size 4664