mgh6 commited on
Commit
83f864f
·
verified ·
1 Parent(s): 53f5382

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cb7fdd07abfa119cf7164e53299c607359a9af6799573cd9441c500a5e24222
3
  size 2609498088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a386dbf8561c23ab581d453dcd91a538b003106a42f2c4c69aa89905c576ec94
3
  size 2609498088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8434daa3277e8b219eb59a6668bc14f8e7e4908bd17a2304d8b94d800e65d92
3
  size 5208796146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cb642c71b31f5e976e9421191ae004cf5d66ac5cbda04966dfe08b3425e655f
3
  size 5208796146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8bb9aa3709cd1c036484251854b8be7d825e637d956e7203821b032aa2540e3
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d3ed9e0dc19a3bb7b5e8c42e476b42194e2ef37d159e54556a02e2773c9c2ab
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:541d8b16c337fd96e05a077a2be76d2de8216d931de0afb127b2cab79028732a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0267fbad859e57b7ff33d6dbd4fbd9fdf3cbf25f82f07754a3b6e19cff3ef2f5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
  "eval_steps": 50,
6
- "global_step": 387,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -112,6 +112,51 @@
112
  "eval_samples_per_second": 37.402,
113
  "eval_steps_per_second": 18.701,
114
  "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  }
116
  ],
117
  "logging_steps": 50,
@@ -131,7 +176,7 @@
131
  "attributes": {}
132
  }
133
  },
134
- "total_flos": 1.0698953774543667e+17,
135
  "train_batch_size": 2,
136
  "trial_name": null,
137
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 50,
6
+ "global_step": 516,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
112
  "eval_samples_per_second": 37.402,
113
  "eval_steps_per_second": 18.701,
114
  "step": 350
115
+ },
116
+ {
117
+ "epoch": 3.101512933138116,
118
+ "grad_norm": 0.6013683676719666,
119
+ "learning_rate": 6.875e-05,
120
+ "loss": 1.2507,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 3.101512933138116,
125
+ "eval_loss": 1.3298077583312988,
126
+ "eval_runtime": 11.5601,
127
+ "eval_samples_per_second": 37.37,
128
+ "eval_steps_per_second": 18.685,
129
+ "step": 400
130
+ },
131
+ {
132
+ "epoch": 3.49194729136164,
133
+ "grad_norm": 0.6187678575515747,
134
+ "learning_rate": 6.484375e-05,
135
+ "loss": 1.2463,
136
+ "step": 450
137
+ },
138
+ {
139
+ "epoch": 3.49194729136164,
140
+ "eval_loss": 1.2986701726913452,
141
+ "eval_runtime": 11.5395,
142
+ "eval_samples_per_second": 37.437,
143
+ "eval_steps_per_second": 18.718,
144
+ "step": 450
145
+ },
146
+ {
147
+ "epoch": 3.8823816495851635,
148
+ "grad_norm": 0.5973629951477051,
149
+ "learning_rate": 6.0937500000000004e-05,
150
+ "loss": 1.2315,
151
+ "step": 500
152
+ },
153
+ {
154
+ "epoch": 3.8823816495851635,
155
+ "eval_loss": 1.2973381280899048,
156
+ "eval_runtime": 11.5491,
157
+ "eval_samples_per_second": 37.405,
158
+ "eval_steps_per_second": 18.703,
159
+ "step": 500
160
  }
161
  ],
162
  "logging_steps": 50,
 
176
  "attributes": {}
177
  }
178
  },
179
+ "total_flos": 1.426527168149586e+17,
180
  "train_batch_size": 2,
181
  "trial_name": null,
182
  "trial_params": null