avsolatorio commited on
Commit
f68a3a7
·
verified ·
1 Parent(s): 69a1dd2

Training in progress, step 18000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59ca63e739c086dc23275c2c5079539d3187e56bee6925af1cb0015a9d019bc3
3
  size 567860028
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c363a1a59fc93c3321875d7f58013a2f10c3aab12f3af0076dff096e82ffb9c
3
  size 567860028
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04093ee821b9d615247867d2bac4f899594c0d9aa7b3314cf38273cd1397133c
3
  size 1135783354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2f24ddbe795a6b5a789b92a1ca3088daece7124054a836e4ca83341f355e4ec
3
  size 1135783354
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05fb8219401a347a541ffd89848ab026f78ecd34f13335c1313ca6b42f94154c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32a2a6418efcb48342a1dc67d0b88b496c3ab628f281d571af1360b4d0af05a5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:865e288c245b395ede96990302d49b324d892d7a26b3b287023faac63c631eb0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:189a7468f8c95c103f6fc6f9a588f85a4d6a2cdd577f74ddb50948dbca31a23b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.036032263189554214,
3
- "best_model_checkpoint": "doc-topic-model_eval-00_train-01/checkpoint-16000",
4
- "epoch": 8.382642998027613,
5
  "eval_steps": 1000,
6
- "global_step": 17000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -449,6 +449,32 @@
449
  "eval_samples_per_second": 307.212,
450
  "eval_steps_per_second": 19.205,
451
  "step": 17000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  }
453
  ],
454
  "logging_steps": 500,
@@ -468,7 +494,7 @@
468
  "attributes": {}
469
  }
470
  },
471
- "total_flos": 384458650453980.0,
472
  "train_batch_size": 4,
473
  "trial_name": null,
474
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.035977210849523544,
3
+ "best_model_checkpoint": "doc-topic-model_eval-00_train-01/checkpoint-18000",
4
+ "epoch": 8.875739644970414,
5
  "eval_steps": 1000,
6
+ "global_step": 18000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
449
  "eval_samples_per_second": 307.212,
450
  "eval_steps_per_second": 19.205,
451
  "step": 17000
452
+ },
453
+ {
454
+ "epoch": 8.629191321499015,
455
+ "grad_norm": 0.4935995936393738,
456
+ "learning_rate": 2.7416173570019726e-06,
457
+ "loss": 0.0243,
458
+ "step": 17500
459
+ },
460
+ {
461
+ "epoch": 8.875739644970414,
462
+ "grad_norm": 0.5695288181304932,
463
+ "learning_rate": 2.2485207100591717e-06,
464
+ "loss": 0.0247,
465
+ "step": 18000
466
+ },
467
+ {
468
+ "epoch": 8.875739644970414,
469
+ "eval_accuracy": 0.9878693822016242,
470
+ "eval_f1": 0.6174748603351955,
471
+ "eval_loss": 0.035977210849523544,
472
+ "eval_precision": 0.7429554742955474,
473
+ "eval_recall": 0.5282557161428462,
474
+ "eval_runtime": 26.442,
475
+ "eval_samples_per_second": 306.709,
476
+ "eval_steps_per_second": 19.174,
477
+ "step": 18000
478
  }
479
  ],
480
  "logging_steps": 500,
 
494
  "attributes": {}
495
  }
496
  },
497
+ "total_flos": 407138463972036.0,
498
  "train_batch_size": 4,
499
  "trial_name": null,
500
  "trial_params": null