Sabbir772 commited on
Commit
ccdf3f7
·
verified ·
1 Parent(s): c8806b5

Training in progress, step 4400, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:609fc3aec7d4ea09743f402bcbdc9312381fb07cfdea7f760dbb9aa66c9922ae
3
  size 990185320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e52c190ed78a0ebb86d913c3edb39c59d1fb56fb02a685e065429d572224717
3
  size 990185320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f7fbb8e0818078c8e05f154119f20b96ef2520c1006cc15bca9aee9ce6c37a6
3
  size 1980545291
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:713ec180255281424e9221e7ce8537b4f82b9eec0533c8b1de86f4fe29e8f69b
3
  size 1980545291
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6db4e22e6e4d4c2dc1940f88f326ffecd6b0a23e25b1f1f067d9f0becb200e8
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85d47bc78c02f641fd9d0148cd322117520fa2159b2a37680fd3d6bfd1e4d8f0
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41bbb83c038b51603ecc1a132a2a64f6ce3985f0afe06c759ec4c137f1f6ea37
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac71e29fa8fcf946ca23ffb18fdc365d779c9119156f3d1b53233021eb38387
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 3.0,
6
  "eval_steps": 400,
7
- "global_step": 4209,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -402,12 +402,36 @@
402
  "learning_rate": 1.1879306248515088e-07,
403
  "loss": 0.6361,
404
  "step": 4200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  }
406
  ],
407
  "logging_steps": 100,
408
- "max_steps": 4209,
409
  "num_input_tokens_seen": 0,
410
- "num_train_epochs": 3,
411
  "save_steps": 400,
412
  "stateful_callbacks": {
413
  "TrainerControl": {
@@ -416,12 +440,12 @@
416
  "should_evaluate": false,
417
  "should_log": false,
418
  "should_save": true,
419
- "should_training_stop": true
420
  },
421
  "attributes": {}
422
  }
423
  },
424
- "total_flos": 1.152343386095616e+16,
425
  "train_batch_size": 8,
426
  "trial_name": null,
427
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.1361368496079827,
6
  "eval_steps": 400,
7
+ "global_step": 4400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
402
  "learning_rate": 1.1879306248515088e-07,
403
  "loss": 0.6361,
404
  "step": 4200
405
+ },
406
+ {
407
+ "epoch": 3.0648610121168924,
408
+ "grad_norm": 4.8953704833984375,
409
+ "learning_rate": 1.9358517462580187e-05,
410
+ "loss": 0.6203,
411
+ "step": 4300
412
+ },
413
+ {
414
+ "epoch": 3.1361368496079827,
415
+ "grad_norm": 4.9622979164123535,
416
+ "learning_rate": 1.864575908766928e-05,
417
+ "loss": 0.6883,
418
+ "step": 4400
419
+ },
420
+ {
421
+ "epoch": 3.1361368496079827,
422
+ "eval_bleu": 54.4615551796715,
423
+ "eval_chrf": 76.75255355595692,
424
+ "eval_loss": 1.152636170387268,
425
+ "eval_runtime": 45.8574,
426
+ "eval_samples_per_second": 9.246,
427
+ "eval_steps_per_second": 1.156,
428
+ "step": 4400
429
  }
430
  ],
431
  "logging_steps": 100,
432
+ "max_steps": 7015,
433
  "num_input_tokens_seen": 0,
434
+ "num_train_epochs": 5,
435
  "save_steps": 400,
436
  "stateful_callbacks": {
437
  "TrainerControl": {
 
440
  "should_evaluate": false,
441
  "should_log": false,
442
  "should_save": true,
443
+ "should_training_stop": false
444
  },
445
  "attributes": {}
446
  }
447
  },
448
+ "total_flos": 1.2046541613170688e+16,
449
  "train_batch_size": 8,
450
  "trial_name": null,
451
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a88ea9e1b21a65cd09311fdaf930fc0a1e92d081971bef8af8521a261729151c
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a97dc88cb36ad68d168c1fe8fdf25a699fd4c00ef193018770a2a9f07a5f869
3
  size 5905