Training in progress, step 70000
Browse files- last-checkpoint/optimizer.pt +2 -2
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61a48a0d3e06f6d9c4dbeec9dd5221abf842041a606411a3eb4c011b2b6ce157
|
| 3 |
+
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c91ea0abf3868a060d7ed4c0f943d81a8c31c59f1a4fbcac20e4c228f3d5306b
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc4e58968df8d8de7b76afb49e34b862e7070d202e80b8a826f72b6f635c2cde
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:594120123fbd1fad56e4287c2d069c836d6ed898e13f3bf2eb5ae1995a6abf18
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c2f001850d67d834ad6f5341284dffa53108ee6ecb8b9fb6837cfd65ff83d24
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e245827ca7b63a97e06cc9b0093c1d74b595472c8bc308d4290564e966646b17
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed21c5a14597e7b8ab428400d75245e02257e563d47635efe05355069d0b1928
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01cbc2bdac8629204109c016e1ffa5b5eef157a33d3511366f9e2f7031a7ee33
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b2cc8ec76a8c7fadc25b1ad07a0e21e6f5b2945d2efe03f93181052bc36951e
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b7f27abef02152cc0e2907407c48c40e2c6425985f5175919eb4dcfb4c58016
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fe2d09bf8807c63a805e572e94358dda9d6462e44109f7e340ab10774501127
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -726,11 +726,131 @@
|
|
| 726 |
"learning_rate": 0.00014996172801386482,
|
| 727 |
"loss": 0.4117,
|
| 728 |
"step": 60000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 729 |
}
|
| 730 |
],
|
| 731 |
"max_steps": 1000000,
|
| 732 |
"num_train_epochs": 2,
|
| 733 |
-
"total_flos": 4.
|
| 734 |
"trial_name": null,
|
| 735 |
"trial_params": null
|
| 736 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.13993339170554817,
|
| 5 |
+
"global_step": 70000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 726 |
"learning_rate": 0.00014996172801386482,
|
| 727 |
"loss": 0.4117,
|
| 728 |
"step": 60000
|
| 729 |
+
},
|
| 730 |
+
{
|
| 731 |
+
"epoch": 0.12,
|
| 732 |
+
"learning_rate": 0.00014995780552943551,
|
| 733 |
+
"loss": 0.4106,
|
| 734 |
+
"step": 60500
|
| 735 |
+
},
|
| 736 |
+
{
|
| 737 |
+
"epoch": 0.12,
|
| 738 |
+
"learning_rate": 0.00014995369178303722,
|
| 739 |
+
"loss": 0.4098,
|
| 740 |
+
"step": 61000
|
| 741 |
+
},
|
| 742 |
+
{
|
| 743 |
+
"epoch": 0.12,
|
| 744 |
+
"learning_rate": 0.0001499493867859168,
|
| 745 |
+
"loss": 0.4095,
|
| 746 |
+
"step": 61500
|
| 747 |
+
},
|
| 748 |
+
{
|
| 749 |
+
"epoch": 0.12,
|
| 750 |
+
"learning_rate": 0.0001499448905498439,
|
| 751 |
+
"loss": 0.4081,
|
| 752 |
+
"step": 62000
|
| 753 |
+
},
|
| 754 |
+
{
|
| 755 |
+
"epoch": 0.12,
|
| 756 |
+
"learning_rate": 0.00014994020308711106,
|
| 757 |
+
"loss": 0.408,
|
| 758 |
+
"step": 62500
|
| 759 |
+
},
|
| 760 |
+
{
|
| 761 |
+
"epoch": 0.13,
|
| 762 |
+
"learning_rate": 0.00014993532441053364,
|
| 763 |
+
"loss": 0.4074,
|
| 764 |
+
"step": 63000
|
| 765 |
+
},
|
| 766 |
+
{
|
| 767 |
+
"epoch": 0.13,
|
| 768 |
+
"learning_rate": 0.0001499302545334498,
|
| 769 |
+
"loss": 0.4066,
|
| 770 |
+
"step": 63500
|
| 771 |
+
},
|
| 772 |
+
{
|
| 773 |
+
"epoch": 0.13,
|
| 774 |
+
"learning_rate": 0.0001499249934697203,
|
| 775 |
+
"loss": 0.4055,
|
| 776 |
+
"step": 64000
|
| 777 |
+
},
|
| 778 |
+
{
|
| 779 |
+
"epoch": 0.13,
|
| 780 |
+
"learning_rate": 0.00014991954123372875,
|
| 781 |
+
"loss": 0.4049,
|
| 782 |
+
"step": 64500
|
| 783 |
+
},
|
| 784 |
+
{
|
| 785 |
+
"epoch": 0.13,
|
| 786 |
+
"learning_rate": 0.0001499138978403813,
|
| 787 |
+
"loss": 0.4038,
|
| 788 |
+
"step": 65000
|
| 789 |
+
},
|
| 790 |
+
{
|
| 791 |
+
"epoch": 0.13,
|
| 792 |
+
"learning_rate": 0.00014990806330510687,
|
| 793 |
+
"loss": 0.4039,
|
| 794 |
+
"step": 65500
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 0.13,
|
| 798 |
+
"learning_rate": 0.00014990203764385677,
|
| 799 |
+
"loss": 0.4029,
|
| 800 |
+
"step": 66000
|
| 801 |
+
},
|
| 802 |
+
{
|
| 803 |
+
"epoch": 0.13,
|
| 804 |
+
"learning_rate": 0.00014989582087310494,
|
| 805 |
+
"loss": 0.4028,
|
| 806 |
+
"step": 66500
|
| 807 |
+
},
|
| 808 |
+
{
|
| 809 |
+
"epoch": 0.13,
|
| 810 |
+
"learning_rate": 0.00014988941300984784,
|
| 811 |
+
"loss": 0.4022,
|
| 812 |
+
"step": 67000
|
| 813 |
+
},
|
| 814 |
+
{
|
| 815 |
+
"epoch": 0.13,
|
| 816 |
+
"learning_rate": 0.00014988281407160426,
|
| 817 |
+
"loss": 0.4013,
|
| 818 |
+
"step": 67500
|
| 819 |
+
},
|
| 820 |
+
{
|
| 821 |
+
"epoch": 0.14,
|
| 822 |
+
"learning_rate": 0.0001498760240764155,
|
| 823 |
+
"loss": 0.4003,
|
| 824 |
+
"step": 68000
|
| 825 |
+
},
|
| 826 |
+
{
|
| 827 |
+
"epoch": 0.14,
|
| 828 |
+
"learning_rate": 0.00014986904304284512,
|
| 829 |
+
"loss": 0.3996,
|
| 830 |
+
"step": 68500
|
| 831 |
+
},
|
| 832 |
+
{
|
| 833 |
+
"epoch": 0.14,
|
| 834 |
+
"learning_rate": 0.000149861870989979,
|
| 835 |
+
"loss": 0.3994,
|
| 836 |
+
"step": 69000
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 0.14,
|
| 840 |
+
"learning_rate": 0.00014985450793742527,
|
| 841 |
+
"loss": 0.399,
|
| 842 |
+
"step": 69500
|
| 843 |
+
},
|
| 844 |
+
{
|
| 845 |
+
"epoch": 0.14,
|
| 846 |
+
"learning_rate": 0.0001498469539053142,
|
| 847 |
+
"loss": 0.3993,
|
| 848 |
+
"step": 70000
|
| 849 |
}
|
| 850 |
],
|
| 851 |
"max_steps": 1000000,
|
| 852 |
"num_train_epochs": 2,
|
| 853 |
+
"total_flos": 4.73259935611438e+21,
|
| 854 |
"trial_name": null,
|
| 855 |
"trial_params": null
|
| 856 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c91ea0abf3868a060d7ed4c0f943d81a8c31c59f1a4fbcac20e4c228f3d5306b
|
| 3 |
size 449450757
|