Training in progress, step 3000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +155 -5

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c9eff5ee207bd67a332529e94c498f20dca4ade4b14ddb6e802baca50011cd67
 size 5517243408

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f6dbe798832e342a67190d44dceef80bd78777804e88c221e3afe22c3917a08
 size 5517243408

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:165a108047674c268721b9f66f363eeb5e7a212dd9c8fa135cf606f83c14ae9d
 size 8984377658

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7284ca750912e2bdb6f4c871990e72df084d5c36a94af05687833beb962f143
 size 8984377658

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:903a92009048b799b57a07de5b0b1e00ddd8f3ce27313ee0f905a7b0c8a563fb
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f1a4433fcead82ba39559404307b112f4eb74f11934440fc8d2e4a1f5d92376
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:24d93be1fd2153b773b930d26cadd2c1619498d1049cd40be5d0e37a4e1e8017
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5c3f157259c65d7b9df5f7d522deadeb86b0679070a3309e3b18e333bffea53
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": 1500,
   "best_metric": 0.02954169362783432,
   "best_model_checkpoint": "/content/mbart-model/checkpoint-1500",
-  "epoch": 4.139072847682119,
   "eval_steps": 500,
-  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -758,6 +758,156 @@
       "eval_samples_per_second": 25.34,
       "eval_steps_per_second": 1.588,
       "step": 2500
     }
   ],
   "logging_steps": 25,
@@ -772,7 +922,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -781,12 +931,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.739772456534016e+16,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

   "best_global_step": 1500,
   "best_metric": 0.02954169362783432,
   "best_model_checkpoint": "/content/mbart-model/checkpoint-1500",
+  "epoch": 4.966887417218543,
   "eval_steps": 500,
+  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 25.34,
       "eval_steps_per_second": 1.588,
       "step": 2500
+    },
+    {
+      "epoch": 4.180463576158941,
+      "grad_norm": 0.10778629779815674,
+      "learning_rate": 9.841269841269842e-06,
+      "loss": 0.007042064070701599,
+      "step": 2525
+    },
+    {
+      "epoch": 4.2218543046357615,
+      "grad_norm": 0.108833447098732,
+      "learning_rate": 9.345238095238096e-06,
+      "loss": 0.00790070116519928,
+      "step": 2550
+    },
+    {
+      "epoch": 4.263245033112582,
+      "grad_norm": 0.18365369737148285,
+      "learning_rate": 8.84920634920635e-06,
+      "loss": 0.006773302555084229,
+      "step": 2575
+    },
+    {
+      "epoch": 4.304635761589404,
+      "grad_norm": 0.1245948076248169,
+      "learning_rate": 8.353174603174603e-06,
+      "loss": 0.007275177240371704,
+      "step": 2600
+    },
+    {
+      "epoch": 4.346026490066225,
+      "grad_norm": 0.14145947992801666,
+      "learning_rate": 7.857142857142858e-06,
+      "loss": 0.0075223612785339355,
+      "step": 2625
+    },
+    {
+      "epoch": 4.387417218543046,
+      "grad_norm": 0.1652764230966568,
+      "learning_rate": 7.361111111111112e-06,
+      "loss": 0.006786306500434876,
+      "step": 2650
+    },
+    {
+      "epoch": 4.428807947019868,
+      "grad_norm": 0.09350095689296722,
+      "learning_rate": 6.865079365079366e-06,
+      "loss": 0.006964877843856811,
+      "step": 2675
+    },
+    {
+      "epoch": 4.470198675496689,
+      "grad_norm": 0.11477820575237274,
+      "learning_rate": 6.369047619047619e-06,
+      "loss": 0.006648789644241333,
+      "step": 2700
+    },
+    {
+      "epoch": 4.51158940397351,
+      "grad_norm": 0.16006991267204285,
+      "learning_rate": 5.873015873015873e-06,
+      "loss": 0.007510648965835571,
+      "step": 2725
+    },
+    {
+      "epoch": 4.552980132450331,
+      "grad_norm": 0.08736822754144669,
+      "learning_rate": 5.3769841269841275e-06,
+      "loss": 0.00650223195552826,
+      "step": 2750
+    },
+    {
+      "epoch": 4.594370860927152,
+      "grad_norm": 0.11176948249340057,
+      "learning_rate": 4.880952380952381e-06,
+      "loss": 0.007162246108055115,
+      "step": 2775
+    },
+    {
+      "epoch": 4.635761589403973,
+      "grad_norm": 0.1488288938999176,
+      "learning_rate": 4.3849206349206344e-06,
+      "loss": 0.006841970086097718,
+      "step": 2800
+    },
+    {
+      "epoch": 4.677152317880795,
+      "grad_norm": 0.11193964630365372,
+      "learning_rate": 3.888888888888889e-06,
+      "loss": 0.006939524412155151,
+      "step": 2825
+    },
+    {
+      "epoch": 4.718543046357616,
+      "grad_norm": 0.12787118554115295,
+      "learning_rate": 3.3928571428571426e-06,
+      "loss": 0.006942141056060791,
+      "step": 2850
+    },
+    {
+      "epoch": 4.759933774834437,
+      "grad_norm": 0.17415784299373627,
+      "learning_rate": 2.896825396825397e-06,
+      "loss": 0.007073127031326294,
+      "step": 2875
+    },
+    {
+      "epoch": 4.801324503311259,
+      "grad_norm": 0.11148407310247421,
+      "learning_rate": 2.4007936507936512e-06,
+      "loss": 0.007078287005424499,
+      "step": 2900
+    },
+    {
+      "epoch": 4.8427152317880795,
+      "grad_norm": 0.07541561126708984,
+      "learning_rate": 1.9047619047619051e-06,
+      "loss": 0.00713414192199707,
+      "step": 2925
+    },
+    {
+      "epoch": 4.8841059602649,
+      "grad_norm": 0.09067052602767944,
+      "learning_rate": 1.4087301587301588e-06,
+      "loss": 0.006777424216270447,
+      "step": 2950
+    },
+    {
+      "epoch": 4.925496688741722,
+      "grad_norm": 0.1183658018708229,
+      "learning_rate": 9.126984126984128e-07,
+      "loss": 0.006296271085739135,
+      "step": 2975
+    },
+    {
+      "epoch": 4.966887417218543,
+      "grad_norm": 0.1548430323600769,
+      "learning_rate": 4.1666666666666667e-07,
+      "loss": 0.006506852507591247,
+      "step": 3000
+    },
+    {
+      "epoch": 4.966887417218543,
+      "eval_bleu": 64.74067661751941,
+      "eval_exact_match": 0.4607026439695762,
+      "eval_loss": 0.03365711122751236,
+      "eval_runtime": 101.7926,
+      "eval_samples_per_second": 27.124,
+      "eval_steps_per_second": 1.7,
+      "step": 3000
     }
   ],
   "logging_steps": 25,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.487839157846016e+16,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null