Training in progress, step 10000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9da6829b1edfacc61441699b4ac6d5dc6abb737be9152be8f29e5862abecd54
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d9c41bcb1f7e3d0ff7cf1e9246c52eba5532bd32a5af7bbe5d88c8501561fc3
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0f02b717c272316648da49ca6391d63601d6d8e37a3b73ce0655aa44e0b1efd
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53471871a37f3cc35b4a656a6f0cfda18046c304a91d9bf8b29b14eea2ccc156
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6817,6 +6817,364 @@
|
|
| 6817 |
"eval_samples_per_second": 129.654,
|
| 6818 |
"eval_steps_per_second": 2.723,
|
| 6819 |
"step": 9500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6820 |
}
|
| 6821 |
],
|
| 6822 |
"logging_steps": 10,
|
|
@@ -6836,7 +7194,7 @@
|
|
| 6836 |
"attributes": {}
|
| 6837 |
}
|
| 6838 |
},
|
| 6839 |
-
"total_flos": 3.
|
| 6840 |
"train_batch_size": 48,
|
| 6841 |
"trial_name": null,
|
| 6842 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.6894745734076704,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 10000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6817 |
"eval_samples_per_second": 129.654,
|
| 6818 |
"eval_steps_per_second": 2.723,
|
| 6819 |
"step": 9500
|
| 6820 |
+
},
|
| 6821 |
+
{
|
| 6822 |
+
"epoch": 1.6066903193106943,
|
| 6823 |
+
"grad_norm": 0.4606820344924927,
|
| 6824 |
+
"learning_rate": 3.960727047894527e-05,
|
| 6825 |
+
"loss": 4.359199523925781,
|
| 6826 |
+
"step": 9510
|
| 6827 |
+
},
|
| 6828 |
+
{
|
| 6829 |
+
"epoch": 1.608379793884102,
|
| 6830 |
+
"grad_norm": 0.48804476857185364,
|
| 6831 |
+
"learning_rate": 3.928353538569023e-05,
|
| 6832 |
+
"loss": 4.32340087890625,
|
| 6833 |
+
"step": 9520
|
| 6834 |
+
},
|
| 6835 |
+
{
|
| 6836 |
+
"epoch": 1.6100692684575098,
|
| 6837 |
+
"grad_norm": 0.4648666977882385,
|
| 6838 |
+
"learning_rate": 3.8960929302853074e-05,
|
| 6839 |
+
"loss": 4.31898078918457,
|
| 6840 |
+
"step": 9530
|
| 6841 |
+
},
|
| 6842 |
+
{
|
| 6843 |
+
"epoch": 1.6117587430309173,
|
| 6844 |
+
"grad_norm": 0.48212724924087524,
|
| 6845 |
+
"learning_rate": 3.863945552014892e-05,
|
| 6846 |
+
"loss": 4.320017242431641,
|
| 6847 |
+
"step": 9540
|
| 6848 |
+
},
|
| 6849 |
+
{
|
| 6850 |
+
"epoch": 1.6134482176043252,
|
| 6851 |
+
"grad_norm": 0.46979817748069763,
|
| 6852 |
+
"learning_rate": 3.831911731574648e-05,
|
| 6853 |
+
"loss": 4.365304946899414,
|
| 6854 |
+
"step": 9550
|
| 6855 |
+
},
|
| 6856 |
+
{
|
| 6857 |
+
"epoch": 1.6151376921777327,
|
| 6858 |
+
"grad_norm": 0.47188496589660645,
|
| 6859 |
+
"learning_rate": 3.799991795623471e-05,
|
| 6860 |
+
"loss": 4.329359817504883,
|
| 6861 |
+
"step": 9560
|
| 6862 |
+
},
|
| 6863 |
+
{
|
| 6864 |
+
"epoch": 1.6168271667511402,
|
| 6865 |
+
"grad_norm": 0.47442197799682617,
|
| 6866 |
+
"learning_rate": 3.7681860696589216e-05,
|
| 6867 |
+
"loss": 4.333200836181641,
|
| 6868 |
+
"step": 9570
|
| 6869 |
+
},
|
| 6870 |
+
{
|
| 6871 |
+
"epoch": 1.6185166413245482,
|
| 6872 |
+
"grad_norm": 0.46460849046707153,
|
| 6873 |
+
"learning_rate": 3.7364948780139344e-05,
|
| 6874 |
+
"loss": 4.2955772399902346,
|
| 6875 |
+
"step": 9580
|
| 6876 |
+
},
|
| 6877 |
+
{
|
| 6878 |
+
"epoch": 1.6202061158979557,
|
| 6879 |
+
"grad_norm": 0.4687038064002991,
|
| 6880 |
+
"learning_rate": 3.70491854385351e-05,
|
| 6881 |
+
"loss": 4.287596893310547,
|
| 6882 |
+
"step": 9590
|
| 6883 |
+
},
|
| 6884 |
+
{
|
| 6885 |
+
"epoch": 1.6218955904713634,
|
| 6886 |
+
"grad_norm": 0.4717998802661896,
|
| 6887 |
+
"learning_rate": 3.673457389171401e-05,
|
| 6888 |
+
"loss": 4.3026374816894535,
|
| 6889 |
+
"step": 9600
|
| 6890 |
+
},
|
| 6891 |
+
{
|
| 6892 |
+
"epoch": 1.6235850650447712,
|
| 6893 |
+
"grad_norm": 0.47237226366996765,
|
| 6894 |
+
"learning_rate": 3.642111734786833e-05,
|
| 6895 |
+
"loss": 4.3385662078857425,
|
| 6896 |
+
"step": 9610
|
| 6897 |
+
},
|
| 6898 |
+
{
|
| 6899 |
+
"epoch": 1.6252745396181787,
|
| 6900 |
+
"grad_norm": 0.48337623476982117,
|
| 6901 |
+
"learning_rate": 3.610881900341261e-05,
|
| 6902 |
+
"loss": 4.29266357421875,
|
| 6903 |
+
"step": 9620
|
| 6904 |
+
},
|
| 6905 |
+
{
|
| 6906 |
+
"epoch": 1.6269640141915864,
|
| 6907 |
+
"grad_norm": 0.46639102697372437,
|
| 6908 |
+
"learning_rate": 3.579768204295063e-05,
|
| 6909 |
+
"loss": 4.3327476501464846,
|
| 6910 |
+
"step": 9630
|
| 6911 |
+
},
|
| 6912 |
+
{
|
| 6913 |
+
"epoch": 1.6286534887649942,
|
| 6914 |
+
"grad_norm": 0.4697898030281067,
|
| 6915 |
+
"learning_rate": 3.54877096392434e-05,
|
| 6916 |
+
"loss": 4.336753463745117,
|
| 6917 |
+
"step": 9640
|
| 6918 |
+
},
|
| 6919 |
+
{
|
| 6920 |
+
"epoch": 1.6303429633384017,
|
| 6921 |
+
"grad_norm": 0.46316251158714294,
|
| 6922 |
+
"learning_rate": 3.5178904953176354e-05,
|
| 6923 |
+
"loss": 4.306925964355469,
|
| 6924 |
+
"step": 9650
|
| 6925 |
+
},
|
| 6926 |
+
{
|
| 6927 |
+
"epoch": 1.6320324379118094,
|
| 6928 |
+
"grad_norm": 0.4708452820777893,
|
| 6929 |
+
"learning_rate": 3.487127113372755e-05,
|
| 6930 |
+
"loss": 4.326674270629883,
|
| 6931 |
+
"step": 9660
|
| 6932 |
+
},
|
| 6933 |
+
{
|
| 6934 |
+
"epoch": 1.6337219124852171,
|
| 6935 |
+
"grad_norm": 0.4727766811847687,
|
| 6936 |
+
"learning_rate": 3.4564811317935235e-05,
|
| 6937 |
+
"loss": 4.304772186279297,
|
| 6938 |
+
"step": 9670
|
| 6939 |
+
},
|
| 6940 |
+
{
|
| 6941 |
+
"epoch": 1.6354113870586247,
|
| 6942 |
+
"grad_norm": 0.47584787011146545,
|
| 6943 |
+
"learning_rate": 3.4259528630865995e-05,
|
| 6944 |
+
"loss": 4.3285400390625,
|
| 6945 |
+
"step": 9680
|
| 6946 |
+
},
|
| 6947 |
+
{
|
| 6948 |
+
"epoch": 1.6371008616320324,
|
| 6949 |
+
"grad_norm": 0.4718579947948456,
|
| 6950 |
+
"learning_rate": 3.3955426185582826e-05,
|
| 6951 |
+
"loss": 4.310879135131836,
|
| 6952 |
+
"step": 9690
|
| 6953 |
+
},
|
| 6954 |
+
{
|
| 6955 |
+
"epoch": 1.6387903362054401,
|
| 6956 |
+
"grad_norm": 0.466880738735199,
|
| 6957 |
+
"learning_rate": 3.365250708311352e-05,
|
| 6958 |
+
"loss": 4.325877380371094,
|
| 6959 |
+
"step": 9700
|
| 6960 |
+
},
|
| 6961 |
+
{
|
| 6962 |
+
"epoch": 1.6404798107788476,
|
| 6963 |
+
"grad_norm": 0.46377378702163696,
|
| 6964 |
+
"learning_rate": 3.335077441241895e-05,
|
| 6965 |
+
"loss": 4.307848358154297,
|
| 6966 |
+
"step": 9710
|
| 6967 |
+
},
|
| 6968 |
+
{
|
| 6969 |
+
"epoch": 1.6421692853522556,
|
| 6970 |
+
"grad_norm": 0.718170166015625,
|
| 6971 |
+
"learning_rate": 3.305023125036148e-05,
|
| 6972 |
+
"loss": 4.313734436035157,
|
| 6973 |
+
"step": 9720
|
| 6974 |
+
},
|
| 6975 |
+
{
|
| 6976 |
+
"epoch": 1.643858759925663,
|
| 6977 |
+
"grad_norm": 0.463375985622406,
|
| 6978 |
+
"learning_rate": 3.275088066167369e-05,
|
| 6979 |
+
"loss": 4.3089752197265625,
|
| 6980 |
+
"step": 9730
|
| 6981 |
+
},
|
| 6982 |
+
{
|
| 6983 |
+
"epoch": 1.6455482344990708,
|
| 6984 |
+
"grad_norm": 0.47580841183662415,
|
| 6985 |
+
"learning_rate": 3.245272569892727e-05,
|
| 6986 |
+
"loss": 4.3522186279296875,
|
| 6987 |
+
"step": 9740
|
| 6988 |
+
},
|
| 6989 |
+
{
|
| 6990 |
+
"epoch": 1.6472377090724786,
|
| 6991 |
+
"grad_norm": 0.46081092953681946,
|
| 6992 |
+
"learning_rate": 3.215576940250155e-05,
|
| 6993 |
+
"loss": 4.3113548278808596,
|
| 6994 |
+
"step": 9750
|
| 6995 |
+
},
|
| 6996 |
+
{
|
| 6997 |
+
"epoch": 1.648927183645886,
|
| 6998 |
+
"grad_norm": 0.47329118847846985,
|
| 6999 |
+
"learning_rate": 3.1860014800552734e-05,
|
| 7000 |
+
"loss": 4.3111930847167965,
|
| 7001 |
+
"step": 9760
|
| 7002 |
+
},
|
| 7003 |
+
{
|
| 7004 |
+
"epoch": 1.6506166582192938,
|
| 7005 |
+
"grad_norm": 0.4813630282878876,
|
| 7006 |
+
"learning_rate": 3.15654649089831e-05,
|
| 7007 |
+
"loss": 4.312236404418945,
|
| 7008 |
+
"step": 9770
|
| 7009 |
+
},
|
| 7010 |
+
{
|
| 7011 |
+
"epoch": 1.6523061327927016,
|
| 7012 |
+
"grad_norm": 0.5134222507476807,
|
| 7013 |
+
"learning_rate": 3.1272122731409916e-05,
|
| 7014 |
+
"loss": 4.3267356872558596,
|
| 7015 |
+
"step": 9780
|
| 7016 |
+
},
|
| 7017 |
+
{
|
| 7018 |
+
"epoch": 1.653995607366109,
|
| 7019 |
+
"grad_norm": 0.4687715768814087,
|
| 7020 |
+
"learning_rate": 3.097999125913518e-05,
|
| 7021 |
+
"loss": 4.311066055297852,
|
| 7022 |
+
"step": 9790
|
| 7023 |
+
},
|
| 7024 |
+
{
|
| 7025 |
+
"epoch": 1.6556850819395168,
|
| 7026 |
+
"grad_norm": 0.4736403524875641,
|
| 7027 |
+
"learning_rate": 3.068907347111485e-05,
|
| 7028 |
+
"loss": 4.3107654571533205,
|
| 7029 |
+
"step": 9800
|
| 7030 |
+
},
|
| 7031 |
+
{
|
| 7032 |
+
"epoch": 1.6573745565129245,
|
| 7033 |
+
"grad_norm": 0.4813496172428131,
|
| 7034 |
+
"learning_rate": 3.0399372333928644e-05,
|
| 7035 |
+
"loss": 4.314376449584961,
|
| 7036 |
+
"step": 9810
|
| 7037 |
+
},
|
| 7038 |
+
{
|
| 7039 |
+
"epoch": 1.659064031086332,
|
| 7040 |
+
"grad_norm": 0.49036741256713867,
|
| 7041 |
+
"learning_rate": 3.0110890801749627e-05,
|
| 7042 |
+
"loss": 4.307826995849609,
|
| 7043 |
+
"step": 9820
|
| 7044 |
+
},
|
| 7045 |
+
{
|
| 7046 |
+
"epoch": 1.6607535056597398,
|
| 7047 |
+
"grad_norm": 0.4669703543186188,
|
| 7048 |
+
"learning_rate": 2.982363181631418e-05,
|
| 7049 |
+
"loss": 4.303530883789063,
|
| 7050 |
+
"step": 9830
|
| 7051 |
+
},
|
| 7052 |
+
{
|
| 7053 |
+
"epoch": 1.6624429802331475,
|
| 7054 |
+
"grad_norm": 0.4788713753223419,
|
| 7055 |
+
"learning_rate": 2.9537598306892103e-05,
|
| 7056 |
+
"loss": 4.308844375610351,
|
| 7057 |
+
"step": 9840
|
| 7058 |
+
},
|
| 7059 |
+
{
|
| 7060 |
+
"epoch": 1.664132454806555,
|
| 7061 |
+
"grad_norm": 0.5307414531707764,
|
| 7062 |
+
"learning_rate": 2.9252793190256447e-05,
|
| 7063 |
+
"loss": 4.285565567016602,
|
| 7064 |
+
"step": 9850
|
| 7065 |
+
},
|
| 7066 |
+
{
|
| 7067 |
+
"epoch": 1.665821929379963,
|
| 7068 |
+
"grad_norm": 0.4659578502178192,
|
| 7069 |
+
"learning_rate": 2.896921937065419e-05,
|
| 7070 |
+
"loss": 4.313910675048828,
|
| 7071 |
+
"step": 9860
|
| 7072 |
+
},
|
| 7073 |
+
{
|
| 7074 |
+
"epoch": 1.6675114039533705,
|
| 7075 |
+
"grad_norm": 0.46300381422042847,
|
| 7076 |
+
"learning_rate": 2.8686879739776137e-05,
|
| 7077 |
+
"loss": 4.31811408996582,
|
| 7078 |
+
"step": 9870
|
| 7079 |
+
},
|
| 7080 |
+
{
|
| 7081 |
+
"epoch": 1.669200878526778,
|
| 7082 |
+
"grad_norm": 0.4717971086502075,
|
| 7083 |
+
"learning_rate": 2.8405777176727924e-05,
|
| 7084 |
+
"loss": 4.318044662475586,
|
| 7085 |
+
"step": 9880
|
| 7086 |
+
},
|
| 7087 |
+
{
|
| 7088 |
+
"epoch": 1.670890353100186,
|
| 7089 |
+
"grad_norm": 0.45347994565963745,
|
| 7090 |
+
"learning_rate": 2.8125914548000243e-05,
|
| 7091 |
+
"loss": 4.295824432373047,
|
| 7092 |
+
"step": 9890
|
| 7093 |
+
},
|
| 7094 |
+
{
|
| 7095 |
+
"epoch": 1.6725798276735935,
|
| 7096 |
+
"grad_norm": 0.4703952670097351,
|
| 7097 |
+
"learning_rate": 2.7847294707439828e-05,
|
| 7098 |
+
"loss": 4.28874626159668,
|
| 7099 |
+
"step": 9900
|
| 7100 |
+
},
|
| 7101 |
+
{
|
| 7102 |
+
"epoch": 1.6742693022470012,
|
| 7103 |
+
"grad_norm": 0.4726548194885254,
|
| 7104 |
+
"learning_rate": 2.7569920496220398e-05,
|
| 7105 |
+
"loss": 4.304931259155273,
|
| 7106 |
+
"step": 9910
|
| 7107 |
+
},
|
| 7108 |
+
{
|
| 7109 |
+
"epoch": 1.675958776820409,
|
| 7110 |
+
"grad_norm": 0.47394225001335144,
|
| 7111 |
+
"learning_rate": 2.729379474281352e-05,
|
| 7112 |
+
"loss": 4.3050182342529295,
|
| 7113 |
+
"step": 9920
|
| 7114 |
+
},
|
| 7115 |
+
{
|
| 7116 |
+
"epoch": 1.6776482513938165,
|
| 7117 |
+
"grad_norm": 0.49833500385284424,
|
| 7118 |
+
"learning_rate": 2.701892026295979e-05,
|
| 7119 |
+
"loss": 4.331858062744141,
|
| 7120 |
+
"step": 9930
|
| 7121 |
+
},
|
| 7122 |
+
{
|
| 7123 |
+
"epoch": 1.6793377259672242,
|
| 7124 |
+
"grad_norm": 0.4709710478782654,
|
| 7125 |
+
"learning_rate": 2.6745299859640318e-05,
|
| 7126 |
+
"loss": 4.332807159423828,
|
| 7127 |
+
"step": 9940
|
| 7128 |
+
},
|
| 7129 |
+
{
|
| 7130 |
+
"epoch": 1.681027200540632,
|
| 7131 |
+
"grad_norm": 0.48379939794540405,
|
| 7132 |
+
"learning_rate": 2.6472936323047972e-05,
|
| 7133 |
+
"loss": 4.311476516723633,
|
| 7134 |
+
"step": 9950
|
| 7135 |
+
},
|
| 7136 |
+
{
|
| 7137 |
+
"epoch": 1.6827166751140394,
|
| 7138 |
+
"grad_norm": 0.475941926240921,
|
| 7139 |
+
"learning_rate": 2.6201832430558866e-05,
|
| 7140 |
+
"loss": 4.314311599731445,
|
| 7141 |
+
"step": 9960
|
| 7142 |
+
},
|
| 7143 |
+
{
|
| 7144 |
+
"epoch": 1.6844061496874472,
|
| 7145 |
+
"grad_norm": 0.4633561372756958,
|
| 7146 |
+
"learning_rate": 2.5931990946704206e-05,
|
| 7147 |
+
"loss": 4.312783050537109,
|
| 7148 |
+
"step": 9970
|
| 7149 |
+
},
|
| 7150 |
+
{
|
| 7151 |
+
"epoch": 1.686095624260855,
|
| 7152 |
+
"grad_norm": 0.4624374806880951,
|
| 7153 |
+
"learning_rate": 2.5663414623141943e-05,
|
| 7154 |
+
"loss": 4.315936279296875,
|
| 7155 |
+
"step": 9980
|
| 7156 |
+
},
|
| 7157 |
+
{
|
| 7158 |
+
"epoch": 1.6877850988342624,
|
| 7159 |
+
"grad_norm": 0.46104687452316284,
|
| 7160 |
+
"learning_rate": 2.5396106198628947e-05,
|
| 7161 |
+
"loss": 4.317576217651367,
|
| 7162 |
+
"step": 9990
|
| 7163 |
+
},
|
| 7164 |
+
{
|
| 7165 |
+
"epoch": 1.6894745734076704,
|
| 7166 |
+
"grad_norm": 0.46486878395080566,
|
| 7167 |
+
"learning_rate": 2.5130068398992716e-05,
|
| 7168 |
+
"loss": 4.3148681640625,
|
| 7169 |
+
"step": 10000
|
| 7170 |
+
},
|
| 7171 |
+
{
|
| 7172 |
+
"epoch": 1.6894745734076704,
|
| 7173 |
+
"eval_loss": 4.282918930053711,
|
| 7174 |
+
"eval_runtime": 3.8826,
|
| 7175 |
+
"eval_samples_per_second": 257.563,
|
| 7176 |
+
"eval_steps_per_second": 5.409,
|
| 7177 |
+
"step": 10000
|
| 7178 |
}
|
| 7179 |
],
|
| 7180 |
"logging_steps": 10,
|
|
|
|
| 7194 |
"attributes": {}
|
| 7195 |
}
|
| 7196 |
},
|
| 7197 |
+
"total_flos": 3.344547305037496e+17,
|
| 7198 |
"train_batch_size": 48,
|
| 7199 |
"trial_name": null,
|
| 7200 |
"trial_params": null
|