Training in progress, step 61000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 301235464
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ff3677e2a6c68c6a9bc84018c91a9abb1bcf7c14c1b566d1f4d545783476a72
|
| 3 |
size 301235464
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 602335994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95defb77fd9d966f9fb370451c779ea88fb6409a7bea604ae57a6a4ab86f381e
|
| 3 |
size 602335994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8ee7735caca4437694ef1fa1c7821cadab81eb5dba9c8318224d8baee7f9384
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80725391fd9590c70c1e5ba84487c80bcb26eb7012140d59e753f7bdbcc81863
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -10688,11 +10688,189 @@
|
|
| 10688 |
"eval_steps_per_second": 23.322,
|
| 10689 |
"num_input_tokens_seen": 15728640000,
|
| 10690 |
"step": 60000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10691 |
}
|
| 10692 |
],
|
| 10693 |
"logging_steps": 50,
|
| 10694 |
-
"max_steps":
|
| 10695 |
-
"num_input_tokens_seen":
|
| 10696 |
"num_train_epochs": 1,
|
| 10697 |
"save_steps": 1000,
|
| 10698 |
"stateful_callbacks": {
|
|
@@ -10702,12 +10880,12 @@
|
|
| 10702 |
"should_evaluate": false,
|
| 10703 |
"should_log": false,
|
| 10704 |
"should_save": true,
|
| 10705 |
-
"should_training_stop":
|
| 10706 |
},
|
| 10707 |
"attributes": {}
|
| 10708 |
}
|
| 10709 |
},
|
| 10710 |
-
"total_flos": 4.
|
| 10711 |
"train_batch_size": 64,
|
| 10712 |
"trial_name": null,
|
| 10713 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.4103164313232873,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 61000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 10688 |
"eval_steps_per_second": 23.322,
|
| 10689 |
"num_input_tokens_seen": 15728640000,
|
| 10690 |
"step": 60000
|
| 10691 |
+
},
|
| 10692 |
+
{
|
| 10693 |
+
"epoch": 0.40392625739284266,
|
| 10694 |
+
"grad_norm": 0.2130047082901001,
|
| 10695 |
+
"learning_rate": 0.0006867974850262581,
|
| 10696 |
+
"loss": 3.0074,
|
| 10697 |
+
"num_input_tokens_seen": 15741747200,
|
| 10698 |
+
"step": 60050
|
| 10699 |
+
},
|
| 10700 |
+
{
|
| 10701 |
+
"epoch": 0.40426258233655027,
|
| 10702 |
+
"grad_norm": 0.18596570193767548,
|
| 10703 |
+
"learning_rate": 0.000682235249939575,
|
| 10704 |
+
"loss": 2.9981,
|
| 10705 |
+
"num_input_tokens_seen": 15754854400,
|
| 10706 |
+
"step": 60100
|
| 10707 |
+
},
|
| 10708 |
+
{
|
| 10709 |
+
"epoch": 0.4045989072802579,
|
| 10710 |
+
"grad_norm": 0.2774942219257355,
|
| 10711 |
+
"learning_rate": 0.0006776554506402081,
|
| 10712 |
+
"loss": 3.0024,
|
| 10713 |
+
"num_input_tokens_seen": 15767961600,
|
| 10714 |
+
"step": 60150
|
| 10715 |
+
},
|
| 10716 |
+
{
|
| 10717 |
+
"epoch": 0.4049352322239655,
|
| 10718 |
+
"grad_norm": 0.19329522550106049,
|
| 10719 |
+
"learning_rate": 0.0006730585285387465,
|
| 10720 |
+
"loss": 3.0101,
|
| 10721 |
+
"num_input_tokens_seen": 15781068800,
|
| 10722 |
+
"step": 60200
|
| 10723 |
+
},
|
| 10724 |
+
{
|
| 10725 |
+
"epoch": 0.4052715571676731,
|
| 10726 |
+
"grad_norm": 0.21384254097938538,
|
| 10727 |
+
"learning_rate": 0.0006684449266961101,
|
| 10728 |
+
"loss": 3.0095,
|
| 10729 |
+
"num_input_tokens_seen": 15794176000,
|
| 10730 |
+
"step": 60250
|
| 10731 |
+
},
|
| 10732 |
+
{
|
| 10733 |
+
"epoch": 0.4056078821113807,
|
| 10734 |
+
"grad_norm": 0.3892166018486023,
|
| 10735 |
+
"learning_rate": 0.0006638150897808468,
|
| 10736 |
+
"loss": 3.0101,
|
| 10737 |
+
"num_input_tokens_seen": 15807283200,
|
| 10738 |
+
"step": 60300
|
| 10739 |
+
},
|
| 10740 |
+
{
|
| 10741 |
+
"epoch": 0.4059442070550883,
|
| 10742 |
+
"grad_norm": 0.27356287837028503,
|
| 10743 |
+
"learning_rate": 0.0006591694640262749,
|
| 10744 |
+
"loss": 3.0322,
|
| 10745 |
+
"num_input_tokens_seen": 15820390400,
|
| 10746 |
+
"step": 60350
|
| 10747 |
+
},
|
| 10748 |
+
{
|
| 10749 |
+
"epoch": 0.40628053199879594,
|
| 10750 |
+
"grad_norm": 0.20498153567314148,
|
| 10751 |
+
"learning_rate": 0.0006545084971874737,
|
| 10752 |
+
"loss": 3.0064,
|
| 10753 |
+
"num_input_tokens_seen": 15833497600,
|
| 10754 |
+
"step": 60400
|
| 10755 |
+
},
|
| 10756 |
+
{
|
| 10757 |
+
"epoch": 0.40661685694250355,
|
| 10758 |
+
"grad_norm": 0.19939659535884857,
|
| 10759 |
+
"learning_rate": 0.0006498326384981283,
|
| 10760 |
+
"loss": 3.0158,
|
| 10761 |
+
"num_input_tokens_seen": 15846604800,
|
| 10762 |
+
"step": 60450
|
| 10763 |
+
},
|
| 10764 |
+
{
|
| 10765 |
+
"epoch": 0.40695318188621116,
|
| 10766 |
+
"grad_norm": 0.24545226991176605,
|
| 10767 |
+
"learning_rate": 0.0006451423386272311,
|
| 10768 |
+
"loss": 3.0132,
|
| 10769 |
+
"num_input_tokens_seen": 15859712000,
|
| 10770 |
+
"step": 60500
|
| 10771 |
+
},
|
| 10772 |
+
{
|
| 10773 |
+
"epoch": 0.40695318188621116,
|
| 10774 |
+
"eval_loss": 2.914865255355835,
|
| 10775 |
+
"eval_runtime": 51.2039,
|
| 10776 |
+
"eval_samples_per_second": 97.649,
|
| 10777 |
+
"eval_steps_per_second": 24.412,
|
| 10778 |
+
"num_input_tokens_seen": 15859712000,
|
| 10779 |
+
"step": 60500
|
| 10780 |
+
},
|
| 10781 |
+
{
|
| 10782 |
+
"epoch": 0.40728950682991877,
|
| 10783 |
+
"grad_norm": 0.2364359349012375,
|
| 10784 |
+
"learning_rate": 0.0006404380496356461,
|
| 10785 |
+
"loss": 3.0102,
|
| 10786 |
+
"num_input_tokens_seen": 15872819200,
|
| 10787 |
+
"step": 60550
|
| 10788 |
+
},
|
| 10789 |
+
{
|
| 10790 |
+
"epoch": 0.4076258317736264,
|
| 10791 |
+
"grad_norm": 0.19283762574195862,
|
| 10792 |
+
"learning_rate": 0.0006357202249325371,
|
| 10793 |
+
"loss": 3.0132,
|
| 10794 |
+
"num_input_tokens_seen": 15885926400,
|
| 10795 |
+
"step": 60600
|
| 10796 |
+
},
|
| 10797 |
+
{
|
| 10798 |
+
"epoch": 0.40796215671733405,
|
| 10799 |
+
"grad_norm": 0.19770501554012299,
|
| 10800 |
+
"learning_rate": 0.0006309893192316686,
|
| 10801 |
+
"loss": 3.0106,
|
| 10802 |
+
"num_input_tokens_seen": 15899033600,
|
| 10803 |
+
"step": 60650
|
| 10804 |
+
},
|
| 10805 |
+
{
|
| 10806 |
+
"epoch": 0.40829848166104166,
|
| 10807 |
+
"grad_norm": 0.18395134806632996,
|
| 10808 |
+
"learning_rate": 0.000626245788507579,
|
| 10809 |
+
"loss": 3.005,
|
| 10810 |
+
"num_input_tokens_seen": 15912140800,
|
| 10811 |
+
"step": 60700
|
| 10812 |
+
},
|
| 10813 |
+
{
|
| 10814 |
+
"epoch": 0.40863480660474927,
|
| 10815 |
+
"grad_norm": 0.21380823850631714,
|
| 10816 |
+
"learning_rate": 0.000621490089951632,
|
| 10817 |
+
"loss": 3.0106,
|
| 10818 |
+
"num_input_tokens_seen": 15925248000,
|
| 10819 |
+
"step": 60750
|
| 10820 |
+
},
|
| 10821 |
+
{
|
| 10822 |
+
"epoch": 0.4089711315484569,
|
| 10823 |
+
"grad_norm": 0.17995478212833405,
|
| 10824 |
+
"learning_rate": 0.0006167226819279528,
|
| 10825 |
+
"loss": 3.0237,
|
| 10826 |
+
"num_input_tokens_seen": 15938355200,
|
| 10827 |
+
"step": 60800
|
| 10828 |
+
},
|
| 10829 |
+
{
|
| 10830 |
+
"epoch": 0.4093074564921645,
|
| 10831 |
+
"grad_norm": 0.31993716955184937,
|
| 10832 |
+
"learning_rate": 0.0006119440239292493,
|
| 10833 |
+
"loss": 3.0158,
|
| 10834 |
+
"num_input_tokens_seen": 15951462400,
|
| 10835 |
+
"step": 60850
|
| 10836 |
+
},
|
| 10837 |
+
{
|
| 10838 |
+
"epoch": 0.4096437814358721,
|
| 10839 |
+
"grad_norm": 0.19210565090179443,
|
| 10840 |
+
"learning_rate": 0.0006071545765325253,
|
| 10841 |
+
"loss": 3.0121,
|
| 10842 |
+
"num_input_tokens_seen": 15964569600,
|
| 10843 |
+
"step": 60900
|
| 10844 |
+
},
|
| 10845 |
+
{
|
| 10846 |
+
"epoch": 0.4099801063795797,
|
| 10847 |
+
"grad_norm": 0.4126472771167755,
|
| 10848 |
+
"learning_rate": 0.0006023548013546899,
|
| 10849 |
+
"loss": 3.0215,
|
| 10850 |
+
"num_input_tokens_seen": 15977676800,
|
| 10851 |
+
"step": 60950
|
| 10852 |
+
},
|
| 10853 |
+
{
|
| 10854 |
+
"epoch": 0.4103164313232873,
|
| 10855 |
+
"grad_norm": 0.26418012380599976,
|
| 10856 |
+
"learning_rate": 0.0005975451610080642,
|
| 10857 |
+
"loss": 3.0125,
|
| 10858 |
+
"num_input_tokens_seen": 15990784000,
|
| 10859 |
+
"step": 61000
|
| 10860 |
+
},
|
| 10861 |
+
{
|
| 10862 |
+
"epoch": 0.4103164313232873,
|
| 10863 |
+
"eval_loss": 2.913696765899658,
|
| 10864 |
+
"eval_runtime": 52.0924,
|
| 10865 |
+
"eval_samples_per_second": 95.983,
|
| 10866 |
+
"eval_steps_per_second": 23.996,
|
| 10867 |
+
"num_input_tokens_seen": 15990784000,
|
| 10868 |
+
"step": 61000
|
| 10869 |
}
|
| 10870 |
],
|
| 10871 |
"logging_steps": 50,
|
| 10872 |
+
"max_steps": 70000,
|
| 10873 |
+
"num_input_tokens_seen": 15990784000,
|
| 10874 |
"num_train_epochs": 1,
|
| 10875 |
"save_steps": 1000,
|
| 10876 |
"stateful_callbacks": {
|
|
|
|
| 10880 |
"should_evaluate": false,
|
| 10881 |
"should_log": false,
|
| 10882 |
"should_save": true,
|
| 10883 |
+
"should_training_stop": false
|
| 10884 |
},
|
| 10885 |
"attributes": {}
|
| 10886 |
}
|
| 10887 |
},
|
| 10888 |
+
"total_flos": 4.27769079005184e+18,
|
| 10889 |
"train_batch_size": 64,
|
| 10890 |
"trial_name": null,
|
| 10891 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6008
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a964c444482261d405cae313adc306063a7a31a0cff9e89a43e151d806eeee7e
|
| 3 |
size 6008
|