Nav772 commited on
Commit
97d6bff
·
verified ·
1 Parent(s): 0bade36

Upload training_logs.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_logs.json +226 -0
training_logs.json ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "loss": 2.3629,
4
+ "grad_norm": 6.658691883087158,
5
+ "learning_rate": 1.2457627118644069e-05,
6
+ "epoch": 0.2127659574468085,
7
+ "step": 50
8
+ },
9
+ {
10
+ "loss": 1.4786,
11
+ "grad_norm": 4.87250280380249,
12
+ "learning_rate": 2.5169491525423728e-05,
13
+ "epoch": 0.425531914893617,
14
+ "step": 100
15
+ },
16
+ {
17
+ "loss": 0.515,
18
+ "grad_norm": 3.0046627521514893,
19
+ "learning_rate": 2.9120151371807e-05,
20
+ "epoch": 0.6382978723404256,
21
+ "step": 150
22
+ },
23
+ {
24
+ "loss": 0.3254,
25
+ "grad_norm": 2.944659948348999,
26
+ "learning_rate": 2.770104068117313e-05,
27
+ "epoch": 0.851063829787234,
28
+ "step": 200
29
+ },
30
+ {
31
+ "eval_loss": 0.10763410478830338,
32
+ "eval_accuracy": 0.972,
33
+ "eval_runtime": 98.2742,
34
+ "eval_samples_per_second": 25.439,
35
+ "eval_steps_per_second": 0.804,
36
+ "epoch": 1.0,
37
+ "step": 235
38
+ },
39
+ {
40
+ "loss": 0.2256,
41
+ "grad_norm": 4.162021160125732,
42
+ "learning_rate": 2.628192999053926e-05,
43
+ "epoch": 1.0638297872340425,
44
+ "step": 250
45
+ },
46
+ {
47
+ "loss": 0.1403,
48
+ "grad_norm": 1.9201328754425049,
49
+ "learning_rate": 2.4862819299905392e-05,
50
+ "epoch": 1.2765957446808511,
51
+ "step": 300
52
+ },
53
+ {
54
+ "loss": 0.1532,
55
+ "grad_norm": 2.2434935569763184,
56
+ "learning_rate": 2.3443708609271523e-05,
57
+ "epoch": 1.4893617021276595,
58
+ "step": 350
59
+ },
60
+ {
61
+ "loss": 0.1162,
62
+ "grad_norm": 2.351989984512329,
63
+ "learning_rate": 2.2024597918637654e-05,
64
+ "epoch": 1.702127659574468,
65
+ "step": 400
66
+ },
67
+ {
68
+ "loss": 0.1216,
69
+ "grad_norm": 2.133737325668335,
70
+ "learning_rate": 2.0605487228003786e-05,
71
+ "epoch": 1.9148936170212765,
72
+ "step": 450
73
+ },
74
+ {
75
+ "eval_loss": 0.09042185544967651,
76
+ "eval_accuracy": 0.9768,
77
+ "eval_runtime": 97.4381,
78
+ "eval_samples_per_second": 25.657,
79
+ "eval_steps_per_second": 0.811,
80
+ "epoch": 2.0,
81
+ "step": 470
82
+ },
83
+ {
84
+ "loss": 0.0778,
85
+ "grad_norm": 0.8670908808708191,
86
+ "learning_rate": 1.9186376537369917e-05,
87
+ "epoch": 2.127659574468085,
88
+ "step": 500
89
+ },
90
+ {
91
+ "loss": 0.0433,
92
+ "grad_norm": 0.14258132874965668,
93
+ "learning_rate": 1.7767265846736048e-05,
94
+ "epoch": 2.3404255319148937,
95
+ "step": 550
96
+ },
97
+ {
98
+ "loss": 0.0369,
99
+ "grad_norm": 3.188659191131592,
100
+ "learning_rate": 1.634815515610218e-05,
101
+ "epoch": 2.5531914893617023,
102
+ "step": 600
103
+ },
104
+ {
105
+ "loss": 0.0305,
106
+ "grad_norm": 2.0361948013305664,
107
+ "learning_rate": 1.4929044465468307e-05,
108
+ "epoch": 2.7659574468085104,
109
+ "step": 650
110
+ },
111
+ {
112
+ "loss": 0.0361,
113
+ "grad_norm": 1.4521644115447998,
114
+ "learning_rate": 1.3509933774834438e-05,
115
+ "epoch": 2.978723404255319,
116
+ "step": 700
117
+ },
118
+ {
119
+ "eval_loss": 0.07695046812295914,
120
+ "eval_accuracy": 0.9788,
121
+ "eval_runtime": 98.1763,
122
+ "eval_samples_per_second": 25.464,
123
+ "eval_steps_per_second": 0.805,
124
+ "epoch": 3.0,
125
+ "step": 705
126
+ },
127
+ {
128
+ "loss": 0.0192,
129
+ "grad_norm": 0.425351083278656,
130
+ "learning_rate": 1.2090823084200568e-05,
131
+ "epoch": 3.1914893617021276,
132
+ "step": 750
133
+ },
134
+ {
135
+ "loss": 0.013,
136
+ "grad_norm": 0.42595893144607544,
137
+ "learning_rate": 1.0671712393566697e-05,
138
+ "epoch": 3.404255319148936,
139
+ "step": 800
140
+ },
141
+ {
142
+ "loss": 0.0092,
143
+ "grad_norm": 0.24749380350112915,
144
+ "learning_rate": 9.252601702932829e-06,
145
+ "epoch": 3.617021276595745,
146
+ "step": 850
147
+ },
148
+ {
149
+ "loss": 0.0118,
150
+ "grad_norm": 0.6825519800186157,
151
+ "learning_rate": 7.83349101229896e-06,
152
+ "epoch": 3.829787234042553,
153
+ "step": 900
154
+ },
155
+ {
156
+ "eval_loss": 0.07638780027627945,
157
+ "eval_accuracy": 0.98,
158
+ "eval_runtime": 97.9771,
159
+ "eval_samples_per_second": 25.516,
160
+ "eval_steps_per_second": 0.806,
161
+ "epoch": 4.0,
162
+ "step": 940
163
+ },
164
+ {
165
+ "loss": 0.0061,
166
+ "grad_norm": 0.02345215529203415,
167
+ "learning_rate": 6.41438032166509e-06,
168
+ "epoch": 4.042553191489362,
169
+ "step": 950
170
+ },
171
+ {
172
+ "loss": 0.0045,
173
+ "grad_norm": 0.12952572107315063,
174
+ "learning_rate": 4.995269631031221e-06,
175
+ "epoch": 4.25531914893617,
176
+ "step": 1000
177
+ },
178
+ {
179
+ "loss": 0.0048,
180
+ "grad_norm": 0.040700629353523254,
181
+ "learning_rate": 3.576158940397351e-06,
182
+ "epoch": 4.468085106382979,
183
+ "step": 1050
184
+ },
185
+ {
186
+ "loss": 0.0042,
187
+ "grad_norm": 0.03013400174677372,
188
+ "learning_rate": 2.1570482497634815e-06,
189
+ "epoch": 4.680851063829787,
190
+ "step": 1100
191
+ },
192
+ {
193
+ "loss": 0.0084,
194
+ "grad_norm": 0.03551739081740379,
195
+ "learning_rate": 7.379375591296122e-07,
196
+ "epoch": 4.8936170212765955,
197
+ "step": 1150
198
+ },
199
+ {
200
+ "eval_loss": 0.07666528224945068,
201
+ "eval_accuracy": 0.9804,
202
+ "eval_runtime": 96.8698,
203
+ "eval_samples_per_second": 25.808,
204
+ "eval_steps_per_second": 0.816,
205
+ "epoch": 5.0,
206
+ "step": 1175
207
+ },
208
+ {
209
+ "train_runtime": 2705.0877,
210
+ "train_samples_per_second": 13.863,
211
+ "train_steps_per_second": 0.434,
212
+ "total_flos": 2.9061579714048e+18,
213
+ "train_loss": 0.24456460309789535,
214
+ "epoch": 5.0,
215
+ "step": 1175
216
+ },
217
+ {
218
+ "eval_loss": 0.07666528224945068,
219
+ "eval_accuracy": 0.9804,
220
+ "eval_runtime": 95.6512,
221
+ "eval_samples_per_second": 26.137,
222
+ "eval_steps_per_second": 0.826,
223
+ "epoch": 5.0,
224
+ "step": 1175
225
+ }
226
+ ]