Training in progress, step 76000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a67b2c60c5b42d0ad22d6b38771528b94fc53ceec628d0597d6fa521952a684c
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc06a7f089f926af24a4dafd2fc5c68a00957b0501ae37664b3613577e08b3af
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6916c88bb66f81e6f1308f6aadeffdb932cc73012f17c967d2f81582f0d6ec4
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:9513e34a098ccfc4d0eeceb95099c5472b4ff0a71cffb25d876aad974cab2486
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1caa66015d3956d30ec507257de058a8c2fd4bde8e3572a38d393062e23e25fa
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3691082114682896d0f28ee5b4c8f41d4639d4efe6c895c755146048ab7c832
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43374ebce165dffb63c7f0a02b8a1fb69d9d2182c0805086854a706ff35de8db
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c20256e223141e6700101ae515de5a6287d380eaea8a4346e1c56536ce67dcb
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23f5704701def73bff9de54ed2bc9c44e464b4fd7bf79cf9e15b571b97700de5
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5392970d44236e7a431111e07b6640793728800da16c52d461401ef3040338a
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2ec98dcfff897ba38371ec424fd9cb0533d296496a8ad5f5af6ba3e2b631320
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:756f7cda01c1bba0353fe356cfd74ccb32f9626ff6708219372ea8a4c1ba35dc
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83d418122fbb0fa369cfecb2f66848d24fc6c35ef433b91965b2ecce9163409e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:178aedfc2920966f379dc01376957ddc00b6df6f84cf67e8abd741361412b63d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.11109860223145246,
   "eval_steps": 500,
-  "global_step": 75000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -26258,6 +26258,356 @@
       "learning_rate": 0.00048160271404356535,
       "loss": 16.4454,
       "step": 75000
     }
   ],
   "logging_steps": 20,
@@ -26277,7 +26627,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.514317317520595e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.11257991692787182,
   "eval_steps": 500,
+  "global_step": 76000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00048160271404356535,
       "loss": 16.4454,
       "step": 75000
+    },
+    {
+      "epoch": 0.11112822852538085,
+      "grad_norm": 5.6875,
+      "learning_rate": 0.0004815977751085578,
+      "loss": 16.438,
+      "step": 75020
+    },
+    {
+      "epoch": 0.11115785481930923,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004815928361735502,
+      "loss": 16.4092,
+      "step": 75040
+    },
+    {
+      "epoch": 0.11118748111323762,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004815878972385427,
+      "loss": 16.364,
+      "step": 75060
+    },
+    {
+      "epoch": 0.111217107407166,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.0004815829583035351,
+      "loss": 16.3725,
+      "step": 75080
+    },
+    {
+      "epoch": 0.11124673370109439,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.00048157801936852754,
+      "loss": 16.4758,
+      "step": 75100
+    },
+    {
+      "epoch": 0.11127635999502278,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.00048157308043352,
+      "loss": 16.3516,
+      "step": 75120
+    },
+    {
+      "epoch": 0.11130598628895116,
+      "grad_norm": 7.375,
+      "learning_rate": 0.00048156814149851243,
+      "loss": 16.4029,
+      "step": 75140
+    },
+    {
+      "epoch": 0.11133561258287955,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004815632025635048,
+      "loss": 16.4283,
+      "step": 75160
+    },
+    {
+      "epoch": 0.11136523887680795,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004815582636284973,
+      "loss": 16.3905,
+      "step": 75180
+    },
+    {
+      "epoch": 0.11139486517073634,
+      "grad_norm": 7.875,
+      "learning_rate": 0.0004815533246934897,
+      "loss": 16.3731,
+      "step": 75200
+    },
+    {
+      "epoch": 0.11142449146466472,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.00048154838575848217,
+      "loss": 16.4243,
+      "step": 75220
+    },
+    {
+      "epoch": 0.11145411775859311,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.00048154344682347456,
+      "loss": 16.3272,
+      "step": 75240
+    },
+    {
+      "epoch": 0.1114837440525215,
+      "grad_norm": 7.125,
+      "learning_rate": 0.00048153850788846696,
+      "loss": 16.3074,
+      "step": 75260
+    },
+    {
+      "epoch": 0.11151337034644988,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.00048153356895345946,
+      "loss": 16.4066,
+      "step": 75280
+    },
+    {
+      "epoch": 0.11154299664037827,
+      "grad_norm": 7.125,
+      "learning_rate": 0.00048152863001845185,
+      "loss": 16.3641,
+      "step": 75300
+    },
+    {
+      "epoch": 0.11157262293430666,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004815236910834443,
+      "loss": 16.3674,
+      "step": 75320
+    },
+    {
+      "epoch": 0.11160224922823504,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.0004815187521484367,
+      "loss": 16.3299,
+      "step": 75340
+    },
+    {
+      "epoch": 0.11163187552216343,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004815138132134292,
+      "loss": 16.4177,
+      "step": 75360
+    },
+    {
+      "epoch": 0.11166150181609182,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.0004815088742784216,
+      "loss": 16.3523,
+      "step": 75380
+    },
+    {
+      "epoch": 0.1116911281100202,
+      "grad_norm": 7.125,
+      "learning_rate": 0.00048150393534341404,
+      "loss": 16.3449,
+      "step": 75400
+    },
+    {
+      "epoch": 0.11172075440394859,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.0004814989964084065,
+      "loss": 16.3556,
+      "step": 75420
+    },
+    {
+      "epoch": 0.11175038069787697,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.00048149405747339893,
+      "loss": 16.4188,
+      "step": 75440
+    },
+    {
+      "epoch": 0.11178000699180536,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004814891185383913,
+      "loss": 16.3877,
+      "step": 75460
+    },
+    {
+      "epoch": 0.11180963328573375,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.0004814841796033838,
+      "loss": 16.3696,
+      "step": 75480
+    },
+    {
+      "epoch": 0.11183925957966215,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004814792406683762,
+      "loss": 16.3543,
+      "step": 75500
+    },
+    {
+      "epoch": 0.11186888587359053,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.00048147430173336867,
+      "loss": 16.4576,
+      "step": 75520
+    },
+    {
+      "epoch": 0.11189851216751892,
+      "grad_norm": 6.625,
+      "learning_rate": 0.00048146936279836106,
+      "loss": 16.3673,
+      "step": 75540
+    },
+    {
+      "epoch": 0.11192813846144731,
+      "grad_norm": 7.25,
+      "learning_rate": 0.0004814644238633535,
+      "loss": 16.4263,
+      "step": 75560
+    },
+    {
+      "epoch": 0.1119577647553757,
+      "grad_norm": 6.75,
+      "learning_rate": 0.00048145948492834596,
+      "loss": 16.3437,
+      "step": 75580
+    },
+    {
+      "epoch": 0.11198739104930408,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.00048145454599333835,
+      "loss": 16.3618,
+      "step": 75600
+    },
+    {
+      "epoch": 0.11201701734323247,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.0004814496070583308,
+      "loss": 16.3418,
+      "step": 75620
+    },
+    {
+      "epoch": 0.11204664363716085,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.0004814446681233232,
+      "loss": 16.3916,
+      "step": 75640
+    },
+    {
+      "epoch": 0.11207626993108924,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.0004814397291883157,
+      "loss": 16.3521,
+      "step": 75660
+    },
+    {
+      "epoch": 0.11210589622501763,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004814347902533081,
+      "loss": 16.3464,
+      "step": 75680
+    },
+    {
+      "epoch": 0.11213552251894601,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.00048142985131830054,
+      "loss": 16.3965,
+      "step": 75700
+    },
+    {
+      "epoch": 0.1121651488128744,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.000481424912383293,
+      "loss": 16.379,
+      "step": 75720
+    },
+    {
+      "epoch": 0.11219477510680279,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00048141997344828543,
+      "loss": 16.3858,
+      "step": 75740
+    },
+    {
+      "epoch": 0.11222440140073117,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004814150345132778,
+      "loss": 16.3445,
+      "step": 75760
+    },
+    {
+      "epoch": 0.11225402769465956,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.0004814100955782703,
+      "loss": 16.2839,
+      "step": 75780
+    },
+    {
+      "epoch": 0.11228365398858794,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.0004814051566432627,
+      "loss": 16.3684,
+      "step": 75800
+    },
+    {
+      "epoch": 0.11231328028251635,
+      "grad_norm": 7.375,
+      "learning_rate": 0.00048140021770825517,
+      "loss": 16.3344,
+      "step": 75820
+    },
+    {
+      "epoch": 0.11234290657644473,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.00048139527877324756,
+      "loss": 16.3251,
+      "step": 75840
+    },
+    {
+      "epoch": 0.11237253287037312,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00048139033983824,
+      "loss": 16.3266,
+      "step": 75860
+    },
+    {
+      "epoch": 0.1124021591643015,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00048138540090323246,
+      "loss": 16.3339,
+      "step": 75880
+    },
+    {
+      "epoch": 0.11243178545822989,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.0004813804619682249,
+      "loss": 16.3593,
+      "step": 75900
+    },
+    {
+      "epoch": 0.11246141175215828,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.0004813755230332173,
+      "loss": 16.3293,
+      "step": 75920
+    },
+    {
+      "epoch": 0.11249103804608666,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.0004813705840982097,
+      "loss": 16.3553,
+      "step": 75940
+    },
+    {
+      "epoch": 0.11252066434001505,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.0004813656451632022,
+      "loss": 16.375,
+      "step": 75960
+    },
+    {
+      "epoch": 0.11255029063394344,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004813607062281946,
+      "loss": 16.3077,
+      "step": 75980
+    },
+    {
+      "epoch": 0.11257991692787182,
+      "grad_norm": 7.25,
+      "learning_rate": 0.00048135576729318704,
+      "loss": 16.3442,
+      "step": 76000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 5.587853722915124e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null