SystemAdmin123 commited on
Commit
dd53eaf
·
verified ·
1 Parent(s): c39b7be

Training in progress, step 180, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65bf4bf882aaede4a28a7946cfc9bdcf36b3c1e5ca2e0ff522619cff2f6a7dcc
3
  size 250490408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dd92eac0c175facf66bcc6744b550abe565351d58bfc65728501b0eb34deb6a
3
  size 250490408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea7a7662193bb7eec3ae38ae2840d7d8ba684cc08e3f58fa084ebd57157cdd62
3
  size 255265850
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5a2c0147463a169de4f6d49d24c6bab30ce74a9cdea031b1cbd9a46e14a1a3
3
  size 255265850
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe45c69e48a4c5c66de748d1ec7b6fa257eefbb2af87020ecbb0edb8fac5e065
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eb8f173dc69af6518470d76de345e8f28d66983c49c7120eb4118b25379494c
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f08d1b5b2fb7f095d5f8456b35fce01dec787b65d26419e00acaecb339621b63
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef1dfb8026c7c70f38bd139303377768a0568a232a11e705dcdc6c898cedb8c3
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf268cb489a17b00a5da2e7f7b4a75dcc816743c1b54be7c48bf73c90d10ecf4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f9f0e9fcb3fec99611921d8f34980406ad06f308daea00535e4da9839a46d96
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.333333333333334,
5
  "eval_steps": 40,
6
- "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -159,6 +159,20 @@
159
  "eval_samples_per_second": 143.198,
160
  "eval_steps_per_second": 2.29,
161
  "step": 160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  }
163
  ],
164
  "logging_steps": 10,
@@ -178,7 +192,7 @@
178
  "attributes": {}
179
  }
180
  },
181
- "total_flos": 1.0372248443551744e+16,
182
  "train_batch_size": 32,
183
  "trial_name": null,
184
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.0,
5
  "eval_steps": 40,
6
+ "global_step": 180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
159
  "eval_samples_per_second": 143.198,
160
  "eval_steps_per_second": 2.29,
161
  "step": 160
162
+ },
163
+ {
164
+ "epoch": 14.166666666666666,
165
+ "grad_norm": 4.5,
166
+ "learning_rate": 0.00017167825131684513,
167
+ "loss": 3.5521,
168
+ "step": 170
169
+ },
170
+ {
171
+ "epoch": 15.0,
172
+ "grad_norm": 4.90625,
173
+ "learning_rate": 0.00016772815716257412,
174
+ "loss": 3.4769,
175
+ "step": 180
176
  }
177
  ],
178
  "logging_steps": 10,
 
192
  "attributes": {}
193
  }
194
  },
195
+ "total_flos": 1.1658850390245376e+16,
196
  "train_batch_size": 32,
197
  "trial_name": null,
198
  "trial_params": null