whitedevil0089devil commited on
Commit
a2ba3aa
·
verified ·
1 Parent(s): 608680f

Upload checkpoint-750/trainer_state.json

Browse files
Files changed (1) hide show
  1. checkpoint-750/trainer_state.json +220 -0
checkpoint-750/trainer_state.json ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 150,
3
+ "best_metric": 0.9915354168771638,
4
+ "best_model_checkpoint": "/content/drive/MyDrive/model/Roberta_Squad/roberta_Squad/checkpoint-150",
5
+ "epoch": 0.5626406601650412,
6
+ "eval_steps": 150,
7
+ "global_step": 750,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0007501875468867217,
14
+ "grad_norm": 27.303098678588867,
15
+ "learning_rate": 0.0,
16
+ "loss": 1.1906,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.037509377344336084,
21
+ "grad_norm": 17.193105697631836,
22
+ "learning_rate": 8.997188378631678e-07,
23
+ "loss": 1.0245,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.07501875468867217,
28
+ "grad_norm": 0.9530491232872009,
29
+ "learning_rate": 1.8369259606373011e-06,
30
+ "loss": 0.2468,
31
+ "step": 100
32
+ },
33
+ {
34
+ "epoch": 0.11252813203300825,
35
+ "grad_norm": 0.649541974067688,
36
+ "learning_rate": 2.7741330834114345e-06,
37
+ "loss": 0.0702,
38
+ "step": 150
39
+ },
40
+ {
41
+ "epoch": 0.11252813203300825,
42
+ "eval_accuracy": 0.9943516121440339,
43
+ "eval_f1_macro": 0.24929195185272598,
44
+ "eval_f1_min": 0.0,
45
+ "eval_f1_std": 0.4317863265269357,
46
+ "eval_f1_weighted": 0.9915354168771638,
47
+ "eval_loss": 0.038926418870687485,
48
+ "eval_runtime": 35.4512,
49
+ "eval_samples_per_second": 119.855,
50
+ "eval_steps_per_second": 3.752,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.15003750937734434,
55
+ "grad_norm": 0.23891082406044006,
56
+ "learning_rate": 3.7113402061855674e-06,
57
+ "loss": 0.0472,
58
+ "step": 200
59
+ },
60
+ {
61
+ "epoch": 0.18754688672168043,
62
+ "grad_norm": 1.910960078239441,
63
+ "learning_rate": 4.6485473289597e-06,
64
+ "loss": 0.0478,
65
+ "step": 250
66
+ },
67
+ {
68
+ "epoch": 0.2250562640660165,
69
+ "grad_norm": 0.1880768984556198,
70
+ "learning_rate": 5.585754451733834e-06,
71
+ "loss": 0.0412,
72
+ "step": 300
73
+ },
74
+ {
75
+ "epoch": 0.2250562640660165,
76
+ "eval_accuracy": 0.9943516121440339,
77
+ "eval_f1_macro": 0.24929195185272598,
78
+ "eval_f1_min": 0.0,
79
+ "eval_f1_std": 0.4317863265269357,
80
+ "eval_f1_weighted": 0.9915354168771638,
81
+ "eval_loss": 0.040430303663015366,
82
+ "eval_runtime": 34.879,
83
+ "eval_samples_per_second": 121.821,
84
+ "eval_steps_per_second": 3.813,
85
+ "step": 300
86
+ },
87
+ {
88
+ "epoch": 0.2625656414103526,
89
+ "grad_norm": 0.09085190296173096,
90
+ "learning_rate": 6.522961574507966e-06,
91
+ "loss": 0.04,
92
+ "step": 350
93
+ },
94
+ {
95
+ "epoch": 0.30007501875468867,
96
+ "grad_norm": 0.05724379047751427,
97
+ "learning_rate": 7.4601686972821e-06,
98
+ "loss": 0.0287,
99
+ "step": 400
100
+ },
101
+ {
102
+ "epoch": 0.33758439609902474,
103
+ "grad_norm": 0.2768031358718872,
104
+ "learning_rate": 8.397375820056232e-06,
105
+ "loss": 0.0374,
106
+ "step": 450
107
+ },
108
+ {
109
+ "epoch": 0.33758439609902474,
110
+ "eval_accuracy": 0.9943516121440339,
111
+ "eval_f1_macro": 0.24929195185272598,
112
+ "eval_f1_min": 0.0,
113
+ "eval_f1_std": 0.4317863265269357,
114
+ "eval_f1_weighted": 0.9915354168771638,
115
+ "eval_loss": 0.033962853252887726,
116
+ "eval_runtime": 34.8673,
117
+ "eval_samples_per_second": 121.862,
118
+ "eval_steps_per_second": 3.814,
119
+ "step": 450
120
+ },
121
+ {
122
+ "epoch": 0.37509377344336087,
123
+ "grad_norm": 1.1712861061096191,
124
+ "learning_rate": 9.334582942830366e-06,
125
+ "loss": 0.0351,
126
+ "step": 500
127
+ },
128
+ {
129
+ "epoch": 0.41260315078769694,
130
+ "grad_norm": 0.24754193425178528,
131
+ "learning_rate": 1.0271790065604499e-05,
132
+ "loss": 0.042,
133
+ "step": 550
134
+ },
135
+ {
136
+ "epoch": 0.450112528132033,
137
+ "grad_norm": 1.4196994304656982,
138
+ "learning_rate": 1.1208997188378632e-05,
139
+ "loss": 0.0544,
140
+ "step": 600
141
+ },
142
+ {
143
+ "epoch": 0.450112528132033,
144
+ "eval_accuracy": 0.9943516121440339,
145
+ "eval_f1_macro": 0.24929195185272598,
146
+ "eval_f1_min": 0.0,
147
+ "eval_f1_std": 0.4317863265269357,
148
+ "eval_f1_weighted": 0.9915354168771638,
149
+ "eval_loss": 0.03701222687959671,
150
+ "eval_runtime": 35.0215,
151
+ "eval_samples_per_second": 121.325,
152
+ "eval_steps_per_second": 3.798,
153
+ "step": 600
154
+ },
155
+ {
156
+ "epoch": 0.4876219054763691,
157
+ "grad_norm": 0.06862561404705048,
158
+ "learning_rate": 1.2146204311152766e-05,
159
+ "loss": 0.0387,
160
+ "step": 650
161
+ },
162
+ {
163
+ "epoch": 0.5251312828207052,
164
+ "grad_norm": 0.3041529357433319,
165
+ "learning_rate": 1.3083411433926897e-05,
166
+ "loss": 0.0341,
167
+ "step": 700
168
+ },
169
+ {
170
+ "epoch": 0.5626406601650412,
171
+ "grad_norm": 0.06853009760379791,
172
+ "learning_rate": 1.402061855670103e-05,
173
+ "loss": 0.0408,
174
+ "step": 750
175
+ },
176
+ {
177
+ "epoch": 0.5626406601650412,
178
+ "eval_accuracy": 0.9943516121440339,
179
+ "eval_f1_macro": 0.24929195185272598,
180
+ "eval_f1_min": 0.0,
181
+ "eval_f1_std": 0.4317863265269357,
182
+ "eval_f1_weighted": 0.9915354168771638,
183
+ "eval_loss": 0.04015015810728073,
184
+ "eval_runtime": 35.0233,
185
+ "eval_samples_per_second": 121.319,
186
+ "eval_steps_per_second": 3.797,
187
+ "step": 750
188
+ }
189
+ ],
190
+ "logging_steps": 50,
191
+ "max_steps": 10664,
192
+ "num_input_tokens_seen": 0,
193
+ "num_train_epochs": 8,
194
+ "save_steps": 150,
195
+ "stateful_callbacks": {
196
+ "EarlyStoppingCallback": {
197
+ "args": {
198
+ "early_stopping_patience": 4,
199
+ "early_stopping_threshold": 0.0005
200
+ },
201
+ "attributes": {
202
+ "early_stopping_patience_counter": 4
203
+ }
204
+ },
205
+ "TrainerControl": {
206
+ "args": {
207
+ "should_epoch_stop": false,
208
+ "should_evaluate": false,
209
+ "should_log": false,
210
+ "should_save": true,
211
+ "should_training_stop": true
212
+ },
213
+ "attributes": {}
214
+ }
215
+ },
216
+ "total_flos": 1.2629670838272e+16,
217
+ "train_batch_size": 32,
218
+ "trial_name": null,
219
+ "trial_params": null
220
+ }