k4black commited on
Commit
e193b61
·
1 Parent(s): dfc25db

Training in progress, step 8800

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56290d577bb399219d12b202be37ca101e6a6f0a9668d1d0cfa2d9aad00953f3
3
  size 997349515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c7978bf8054801da6f4c756ac3ee82d92c0f486af86b40dc0bf28323cf03296
3
  size 997349515
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd83f389d0c97375b7f818c8e82d986e4ae56a618b8b2c299c81b885ff7a1e3f
3
  size 498663405
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19f88d4ac00fa7feb29c8dd174036ce80b27f550f2e7dcaa40ee80769e3c83a2
3
  size 498663405
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d35e892693875b95b0138807496870cd0cc89c55b72b1b6dd1430d39771a0ee
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc559e8f3be7931753acd7e14135ef3c355c45429768068a18a2cdbd924ae84e
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aab32762904ae4476cadd93f071777e19190a68ba8e7e9fe09145ebc857602b7
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ac5f79294afe5b58702f8cfd595843c97cf8ecf3eb18e0b1f699ed162ad0894
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5835770cef613970a169521bc333c3ce82fb4729d6ec5ebe78749ca3671853b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ac05aa2ac2c93c43b69f88d2a334721a2027f6a3102b80f21e4e6745984e25d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.907479569921263,
3
- "best_model_checkpoint": "/home2/s5431786/nlp-final-project/results/roberta-base-e-snli-classification-nli-base/checkpoint-7200",
4
- "epoch": 0.8387698042870456,
5
- "global_step": 7200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -294,11 +294,75 @@
294
  "eval_samples_per_second": 2361.347,
295
  "eval_steps_per_second": 36.949,
296
  "step": 7200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  }
298
  ],
299
  "max_steps": 25752,
300
  "num_train_epochs": 3,
301
- "total_flos": 1.2570506645372928e+16,
302
  "trial_name": null,
303
  "trial_params": null
304
  }
 
1
  {
2
+ "best_metric": 0.9120778013038088,
3
+ "best_model_checkpoint": "/home2/s5431786/nlp-final-project/results/roberta-base-e-snli-classification-nli-base/checkpoint-8000",
4
+ "epoch": 1.0251630941286114,
5
+ "global_step": 8800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
294
  "eval_samples_per_second": 2361.347,
295
  "eval_steps_per_second": 36.949,
296
  "step": 7200
297
+ },
298
+ {
299
+ "epoch": 0.89,
300
+ "learning_rate": 7.421517331589274e-06,
301
+ "loss": 0.3309,
302
+ "step": 7600
303
+ },
304
+ {
305
+ "epoch": 0.89,
306
+ "eval_accuracy": 0.9061166429587483,
307
+ "eval_f1": 0.9058061283874342,
308
+ "eval_loss": 0.2577354609966278,
309
+ "eval_runtime": 4.4559,
310
+ "eval_samples_per_second": 2208.773,
311
+ "eval_steps_per_second": 34.561,
312
+ "step": 7600
313
+ },
314
+ {
315
+ "epoch": 0.93,
316
+ "learning_rate": 7.258011772400262e-06,
317
+ "loss": 0.3236,
318
+ "step": 8000
319
+ },
320
+ {
321
+ "epoch": 0.93,
322
+ "eval_accuracy": 0.9121113594797805,
323
+ "eval_f1": 0.9120778013038088,
324
+ "eval_loss": 0.25613412261009216,
325
+ "eval_runtime": 4.1922,
326
+ "eval_samples_per_second": 2347.706,
327
+ "eval_steps_per_second": 36.735,
328
+ "step": 8000
329
+ },
330
+ {
331
+ "epoch": 0.98,
332
+ "learning_rate": 7.09450621321125e-06,
333
+ "loss": 0.3183,
334
+ "step": 8400
335
+ },
336
+ {
337
+ "epoch": 0.98,
338
+ "eval_accuracy": 0.908758382442593,
339
+ "eval_f1": 0.9083914158392551,
340
+ "eval_loss": 0.2555808424949646,
341
+ "eval_runtime": 4.2479,
342
+ "eval_samples_per_second": 2316.917,
343
+ "eval_steps_per_second": 36.253,
344
+ "step": 8400
345
+ },
346
+ {
347
+ "epoch": 1.03,
348
+ "learning_rate": 6.931000654022237e-06,
349
+ "loss": 0.3022,
350
+ "step": 8800
351
+ },
352
+ {
353
+ "epoch": 1.03,
354
+ "eval_accuracy": 0.906421459053038,
355
+ "eval_f1": 0.9056135727779063,
356
+ "eval_loss": 0.2667880356311798,
357
+ "eval_runtime": 4.4383,
358
+ "eval_samples_per_second": 2217.519,
359
+ "eval_steps_per_second": 34.698,
360
+ "step": 8800
361
  }
362
  ],
363
  "max_steps": 25752,
364
  "num_train_epochs": 3,
365
+ "total_flos": 1.536997071500478e+16,
366
  "trial_name": null,
367
  "trial_params": null
368
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd83f389d0c97375b7f818c8e82d986e4ae56a618b8b2c299c81b885ff7a1e3f
3
  size 498663405
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19f88d4ac00fa7feb29c8dd174036ce80b27f550f2e7dcaa40ee80769e3c83a2
3
  size 498663405