mgh6 commited on
Commit
e8825e6
·
verified ·
1 Parent(s): d024571

Training in progress, step 20000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d734e6db78f60fbbbccb113593aad2dacb6d4ef24adf3f2d2f765c04c3543ac
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c3635274eee21ba1976640f8142d5c1fbf66c82e539af94ae87a1f3ee9eb023
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93e652abff809338fab18533d877e6e5d8666ad14769f8c73336a2290bb1d2f4
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36b31c0839e033c8fdb5f6e0bfc78e2d9735a0fe9264510a0e2f8d12a0d58d55
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec459b20953804686a31ef9dfbbe457e9e9185e9c16c0990f91f7b84833fdbbf
3
  size 14942
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ce1f6957d6020f16416f4fd345e9fae1e2983c0b203bfade0ebefc01a476151
3
  size 14942
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1809e210db6349fe0ab123cca2b958d44ebb1729354689aaf0244de4c395185
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d84e3c52aa4a82118b4d8d3c4b8e8c4226ad3a7764da780159cac8984f3a66d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.576659038901602,
5
  "eval_steps": 500,
6
- "global_step": 19000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -140,6 +140,13 @@
140
  "learning_rate": 0.0008474263229743838,
141
  "loss": 1.0737,
142
  "step": 19000
 
 
 
 
 
 
 
143
  }
144
  ],
145
  "logging_steps": 1000,
@@ -159,7 +166,7 @@
159
  "attributes": {}
160
  }
161
  },
162
- "total_flos": 1.900695900913664e+17,
163
  "train_batch_size": 64,
164
  "trial_name": null,
165
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.817535830422739,
5
  "eval_steps": 500,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
140
  "learning_rate": 0.0008474263229743838,
141
  "loss": 1.0737,
142
  "step": 19000
143
+ },
144
+ {
145
+ "epoch": 4.817535830422739,
146
+ "grad_norm": 0.5085521340370178,
147
+ "learning_rate": 0.0008393961294467197,
148
+ "loss": 1.068,
149
+ "step": 20000
150
  }
151
  ],
152
  "logging_steps": 1000,
 
166
  "attributes": {}
167
  }
168
  },
169
+ "total_flos": 2.0007346657191526e+17,
170
  "train_batch_size": 64,
171
  "trial_name": null,
172
  "trial_params": null