Update model

Files changed (4) hide show

config.json CHANGED Viewed

@@ -20,6 +20,7 @@
   "num_layers": 6,
   "output_past": true,
   "pad_token_id": 0,
   "relative_attention_num_buckets": 32,
   "task_specific_params": {
     "summarization": {
@@ -51,7 +52,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.17.0",
   "use_cache": true,
   "vocab_size": 32128
 }

   "num_layers": 6,
   "output_past": true,
   "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
   "relative_attention_num_buckets": 32,
   "task_specific_params": {
     "summarization": {
     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.18.0",
   "use_cache": true,
   "vocab_size": 32128
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d508d86071ac3db833a5e088b980356c4982644b23a731d31731204756ce7b9d
-size 242085627

 version https://git-lfs.github.com/spec/v1
+oid sha256:2904f5e412ff63d45b4e98976027f57a15f4c1bfc562fde4ca7ab6b075f23d68
+size 242070267

trainer_state.json CHANGED Viewed

@@ -2,42 +2,42 @@
   "best_metric": null,
   "best_model_checkpoint": null,
   "epoch": 5.0,
-  "global_step": 1750,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 1.43,
-      "learning_rate": 3.571428571428572e-05,
-      "loss": 0.8859,
       "step": 500
     },
     {
-      "epoch": 2.86,
-      "learning_rate": 2.1428571428571428e-05,
-      "loss": 0.0459,
       "step": 1000
     },
     {
-      "epoch": 4.29,
-      "learning_rate": 7.142857142857143e-06,
-      "loss": 0.0187,
       "step": 1500
     },
     {
       "epoch": 5.0,
-      "step": 1750,
-      "total_flos": 29806917058560.0,
-      "train_loss": 0.27371163518088204,
-      "train_runtime": 133.5222,
-      "train_samples_per_second": 52.426,
-      "train_steps_per_second": 13.106
     }
   ],
-  "max_steps": 1750,
   "num_train_epochs": 5,
-  "total_flos": 29806917058560.0,
   "trial_name": null,
   "trial_params": null
 }

   "best_metric": null,
   "best_model_checkpoint": null,
   "epoch": 5.0,
+  "global_step": 1875,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 1.33,
+      "learning_rate": 3.6666666666666666e-05,
+      "loss": 0.8661,
       "step": 500
     },
     {
+      "epoch": 2.67,
+      "learning_rate": 2.3333333333333336e-05,
+      "loss": 0.0521,
       "step": 1000
     },
     {
+      "epoch": 4.0,
+      "learning_rate": 1e-05,
+      "loss": 0.0199,
       "step": 1500
     },
     {
       "epoch": 5.0,
+      "step": 1875,
+      "total_flos": 31746111307776.0,
+      "train_loss": 0.2530242416381836,
+      "train_runtime": 134.3349,
+      "train_samples_per_second": 55.831,
+      "train_steps_per_second": 13.958
     }
   ],
+  "max_steps": 1875,
   "num_train_epochs": 5,
+  "total_flos": 31746111307776.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:322e30da87d91285cc592fef6b8b680764c3d73dd2be1478a6c6fd789346423d
 size 2991

 version https://git-lfs.github.com/spec/v1
+oid sha256:897f546f75deb840b1810a2128356735036e00fd14c88e92914e4fa4c3e1ee32
 size 2991