shorecode commited on
Commit
32a84ce
·
verified ·
1 Parent(s): db25ff1

Upload folder using huggingface_hub

Browse files
checkpoint-latest/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fd29ac343b2e0582fb8a887937c055cdcc418a58a57805fd2e8f97c4a9b5902
3
  size 62293080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca78270c1eb66602a5b2542766d42a3fc8e325de3b96fce543a19e8e8bef7ca7
3
  size 62293080
checkpoint-latest/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f959a9f93fe8c29de8bad80e922cd938a2f1ded943f2da2478b6fa5c4adfd70
3
  size 124642443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8e440d79ef49bcf712a1d5de898513d16e87c6d8ce5161401d087749466aa41
3
  size 124642443
checkpoint-latest/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:019869f04ad9e65816198cf9bbc88e01065d43c5ae26d3c4dfa75569351f838e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5484747db8664edead11a8224a290c2af74f43eba04cf19c64277c5316deb36e
3
  size 14645
checkpoint-latest/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a377768ec214c80211f84cf49de0b3a1987fbbc2efbe16a70dc0f558f050f0a7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1017972ceaff444cd6ce76f4cbc6776ac15954ae5dab86e347a964413d5e9fd6
3
  size 1465
checkpoint-latest/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.055780397677283,
6
  "eval_steps": 500,
7
- "global_step": 6000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -440,6 +440,78 @@
440
  "eval_samples_per_second": 108.993,
441
  "eval_steps_per_second": 4.194,
442
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
  }
444
  ],
445
  "logging_steps": 125,
@@ -459,7 +531,7 @@
459
  "attributes": {}
460
  }
461
  },
462
- "total_flos": 3520096723009536.0,
463
  "train_batch_size": 26,
464
  "trial_name": null,
465
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.2317437972901637,
6
  "eval_steps": 500,
7
+ "global_step": 7000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
440
  "eval_samples_per_second": 108.993,
441
  "eval_steps_per_second": 4.194,
442
  "step": 6000
443
+ },
444
+ {
445
+ "epoch": 1.0777758226288932,
446
+ "grad_norm": 0.8108923435211182,
447
+ "learning_rate": 0.00019224001407707196,
448
+ "loss": 3.1627,
449
+ "step": 6125
450
+ },
451
+ {
452
+ "epoch": 1.0997712475805033,
453
+ "grad_norm": 0.8860889077186584,
454
+ "learning_rate": 0.00019004047158191096,
455
+ "loss": 3.1703,
456
+ "step": 6250
457
+ },
458
+ {
459
+ "epoch": 1.1217666725321134,
460
+ "grad_norm": 0.6584394574165344,
461
+ "learning_rate": 0.00018784092908674993,
462
+ "loss": 3.1704,
463
+ "step": 6375
464
+ },
465
+ {
466
+ "epoch": 1.1437620974837235,
467
+ "grad_norm": 0.7717780470848083,
468
+ "learning_rate": 0.00018564138659158893,
469
+ "loss": 3.1712,
470
+ "step": 6500
471
+ },
472
+ {
473
+ "epoch": 1.1437620974837235,
474
+ "eval_loss": 2.6807186603546143,
475
+ "eval_runtime": 90.3407,
476
+ "eval_samples_per_second": 109.032,
477
+ "eval_steps_per_second": 4.195,
478
+ "step": 6500
479
+ },
480
+ {
481
+ "epoch": 1.1657575224353334,
482
+ "grad_norm": 0.666558563709259,
483
+ "learning_rate": 0.00018344184409642793,
484
+ "loss": 3.1564,
485
+ "step": 6625
486
+ },
487
+ {
488
+ "epoch": 1.1877529473869435,
489
+ "grad_norm": 0.8080345392227173,
490
+ "learning_rate": 0.0001812423016012669,
491
+ "loss": 3.1725,
492
+ "step": 6750
493
+ },
494
+ {
495
+ "epoch": 1.2097483723385536,
496
+ "grad_norm": 0.7759010791778564,
497
+ "learning_rate": 0.0001790427591061059,
498
+ "loss": 3.1469,
499
+ "step": 6875
500
+ },
501
+ {
502
+ "epoch": 1.2317437972901637,
503
+ "grad_norm": 0.7258739471435547,
504
+ "learning_rate": 0.00017684321661094488,
505
+ "loss": 3.147,
506
+ "step": 7000
507
+ },
508
+ {
509
+ "epoch": 1.2317437972901637,
510
+ "eval_loss": 2.674105405807495,
511
+ "eval_runtime": 90.5221,
512
+ "eval_samples_per_second": 108.813,
513
+ "eval_steps_per_second": 4.187,
514
+ "step": 7000
515
  }
516
  ],
517
  "logging_steps": 125,
 
531
  "attributes": {}
532
  }
533
  },
534
+ "total_flos": 4106809598017536.0,
535
  "train_batch_size": 26,
536
  "trial_name": null,
537
  "trial_params": null