abdelhalim commited on
Commit
bedf785
·
1 Parent(s): f264eaf

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +206 -0
trainer_state.json ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.860524091293322,
5
+ "global_step": 11500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.21,
12
+ "learning_rate": 2.8732037193575658e-05,
13
+ "loss": 3.5507,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.42,
18
+ "learning_rate": 2.7464074387151312e-05,
19
+ "loss": 3.139,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.63,
24
+ "learning_rate": 2.6196111580726966e-05,
25
+ "loss": 3.0681,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.85,
30
+ "learning_rate": 2.492814877430262e-05,
31
+ "loss": 3.0193,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 1.0,
36
+ "eval_gen_len": 7.8559,
37
+ "eval_loss": 2.918958902359009,
38
+ "eval_rouge1": 48.6486,
39
+ "eval_rouge2": 0.0,
40
+ "eval_rougeL": 48.6486,
41
+ "eval_rougeLsum": 48.6486,
42
+ "eval_runtime": 1.4313,
43
+ "eval_samples_per_second": 77.553,
44
+ "eval_steps_per_second": 4.891,
45
+ "step": 2366
46
+ },
47
+ {
48
+ "epoch": 1.06,
49
+ "learning_rate": 2.3660185967878277e-05,
50
+ "loss": 2.9837,
51
+ "step": 2500
52
+ },
53
+ {
54
+ "epoch": 1.27,
55
+ "learning_rate": 2.239222316145393e-05,
56
+ "loss": 2.9458,
57
+ "step": 3000
58
+ },
59
+ {
60
+ "epoch": 1.48,
61
+ "learning_rate": 2.112426035502959e-05,
62
+ "loss": 2.9247,
63
+ "step": 3500
64
+ },
65
+ {
66
+ "epoch": 1.69,
67
+ "learning_rate": 1.985629754860524e-05,
68
+ "loss": 2.9004,
69
+ "step": 4000
70
+ },
71
+ {
72
+ "epoch": 1.9,
73
+ "learning_rate": 1.8588334742180896e-05,
74
+ "loss": 2.9038,
75
+ "step": 4500
76
+ },
77
+ {
78
+ "epoch": 2.0,
79
+ "eval_gen_len": 7.7387,
80
+ "eval_loss": 2.838879346847534,
81
+ "eval_rouge1": 48.6486,
82
+ "eval_rouge2": 0.0,
83
+ "eval_rougeL": 48.6486,
84
+ "eval_rougeLsum": 48.6486,
85
+ "eval_runtime": 1.069,
86
+ "eval_samples_per_second": 103.833,
87
+ "eval_steps_per_second": 6.548,
88
+ "step": 4732
89
+ },
90
+ {
91
+ "epoch": 2.11,
92
+ "learning_rate": 1.732037193575655e-05,
93
+ "loss": 2.8876,
94
+ "step": 5000
95
+ },
96
+ {
97
+ "epoch": 2.32,
98
+ "learning_rate": 1.6052409129332207e-05,
99
+ "loss": 2.8646,
100
+ "step": 5500
101
+ },
102
+ {
103
+ "epoch": 2.54,
104
+ "learning_rate": 1.4784446322907861e-05,
105
+ "loss": 2.8488,
106
+ "step": 6000
107
+ },
108
+ {
109
+ "epoch": 2.75,
110
+ "learning_rate": 1.3516483516483517e-05,
111
+ "loss": 2.8431,
112
+ "step": 6500
113
+ },
114
+ {
115
+ "epoch": 2.96,
116
+ "learning_rate": 1.224852071005917e-05,
117
+ "loss": 2.8369,
118
+ "step": 7000
119
+ },
120
+ {
121
+ "epoch": 3.0,
122
+ "eval_gen_len": 7.8468,
123
+ "eval_loss": 2.808067560195923,
124
+ "eval_rouge1": 48.6486,
125
+ "eval_rouge2": 0.0,
126
+ "eval_rougeL": 48.6486,
127
+ "eval_rougeLsum": 48.6486,
128
+ "eval_runtime": 1.0729,
129
+ "eval_samples_per_second": 103.457,
130
+ "eval_steps_per_second": 6.524,
131
+ "step": 7098
132
+ },
133
+ {
134
+ "epoch": 3.17,
135
+ "learning_rate": 1.0980557903634826e-05,
136
+ "loss": 2.8347,
137
+ "step": 7500
138
+ },
139
+ {
140
+ "epoch": 3.38,
141
+ "learning_rate": 9.712595097210482e-06,
142
+ "loss": 2.82,
143
+ "step": 8000
144
+ },
145
+ {
146
+ "epoch": 3.59,
147
+ "learning_rate": 8.444632290786136e-06,
148
+ "loss": 2.826,
149
+ "step": 8500
150
+ },
151
+ {
152
+ "epoch": 3.8,
153
+ "learning_rate": 7.1766694843617924e-06,
154
+ "loss": 2.7988,
155
+ "step": 9000
156
+ },
157
+ {
158
+ "epoch": 4.0,
159
+ "eval_gen_len": 8.027,
160
+ "eval_loss": 2.785550832748413,
161
+ "eval_rouge1": 48.6486,
162
+ "eval_rouge2": 0.0,
163
+ "eval_rougeL": 48.6486,
164
+ "eval_rougeLsum": 48.6486,
165
+ "eval_runtime": 1.0854,
166
+ "eval_samples_per_second": 102.268,
167
+ "eval_steps_per_second": 6.449,
168
+ "step": 9464
169
+ },
170
+ {
171
+ "epoch": 4.02,
172
+ "learning_rate": 5.908706677937447e-06,
173
+ "loss": 2.8132,
174
+ "step": 9500
175
+ },
176
+ {
177
+ "epoch": 4.23,
178
+ "learning_rate": 4.640743871513102e-06,
179
+ "loss": 2.7946,
180
+ "step": 10000
181
+ },
182
+ {
183
+ "epoch": 4.44,
184
+ "learning_rate": 3.3727810650887576e-06,
185
+ "loss": 2.8009,
186
+ "step": 10500
187
+ },
188
+ {
189
+ "epoch": 4.65,
190
+ "learning_rate": 2.1048182586644128e-06,
191
+ "loss": 2.7986,
192
+ "step": 11000
193
+ },
194
+ {
195
+ "epoch": 4.86,
196
+ "learning_rate": 8.368554522400676e-07,
197
+ "loss": 2.7982,
198
+ "step": 11500
199
+ }
200
+ ],
201
+ "max_steps": 11830,
202
+ "num_train_epochs": 5,
203
+ "total_flos": 5833380202217472.0,
204
+ "trial_name": null,
205
+ "trial_params": null
206
+ }