Upload 14 files

Browse files

Files changed (7) hide show

latest +1 -1
pytorch_model.bin +2 -2
rng_state_0.pth +1 -1
rng_state_1.pth +1 -1
rng_state_2.pth +1 -1
rng_state_3.pth +1 -1
trainer_state.json +3 -162

latest CHANGED Viewed

	@@ -1 +1 @@
1	- ~~global_step2000~~


1	+ global_step8000

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e66fff2a6bbfe0a0077fb9179b3d1441781be24e39a82a6c8106c3cdea18d6ec
-size 123569645

 version https://git-lfs.github.com/spec/v1
+oid sha256:d990b51ce673b6fd2abc89e19c8de3396176bd4b7629d4afe284e263eeb25b50
+size 23657822141

rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e75c96f06b249e57a701db73ce821398e69672027a86d3a44063830602a29ab4
 size 14583

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e3c5cb412e12159a59afe5657ce4b5e0a06e7fb420bedbb5228fe1245702762
 size 14583

rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6dae7f45b6bac644ac207a61f43cba6d4b919a4cac22022bbb02907914422f5d
 size 14583

 version https://git-lfs.github.com/spec/v1
+oid sha256:741230672078323886b763e522c728741456a587860909fc529ce815a7aca5ec
 size 14583

rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e53c770fe48635faad7fa341007d771781f1397cd47daab5b58f879ffb65f178
 size 14583

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ea587886b41579993bb5d20c79047b968ae2d71d22ba4c739b07ce31d7486a6
 size 14583

rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68692af1001e65d02e07ac9974ccf4c332cfb23bc8f89566e1a908b1f2c4a1ed
 size 14583

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ab727740f74dd67e60283d27b4339609a1dda888b067cc06520e2f1d7dc17db
 size 14583

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.9992992291520673,
-  "global_step": 8025,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -48006,170 +48006,11 @@
       "learning_rate": 2e-05,
       "loss": 0.4429,
       "step": 8000
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.5603,
-      "step": 8001
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.5119,
-      "step": 8002
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.3574,
-      "step": 8003
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.4055,
-      "step": 8004
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.6877,
-      "step": 8005
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.3634,
-      "step": 8006
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.4054,
-      "step": 8007
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.3723,
-      "step": 8008
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.4081,
-      "step": 8009
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.4419,
-      "step": 8010
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.6377,
-      "step": 8011
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.5082,
-      "step": 8012
-    },
-    {
-      "epoch": 2.99,
-      "learning_rate": 2e-05,
-      "loss": 0.5274,
-      "step": 8013
-    },
-    {
-      "epoch": 3.0,
-      "learning_rate": 2e-05,
-      "loss": 0.4954,
-      "step": 8014
-    },
-    {
-      "epoch": 3.0,
-      "learning_rate": 2e-05,
-      "loss": 0.617,
-      "step": 8015
-    },
-    {
-      "epoch": 3.0,
-      "learning_rate": 2e-05,
-      "loss": 0.4943,
-      "step": 8016
-    },
-    {
-      "epoch": 3.0,
-      "learning_rate": 2e-05,
-      "loss": 0.3116,
-      "step": 8017
-    },
-    {
-      "epoch": 3.0,
-      "learning_rate": 2e-05,
-      "loss": 0.4602,
-      "step": 8018
-    },
-    {
-      "epoch": 3.0,
-      "learning_rate": 2e-05,
-      "loss": 0.4009,
-      "step": 8019
-    },
-    {
-      "epoch": 3.0,
-      "learning_rate": 2e-05,
-      "loss": 0.4631,
-      "step": 8020
-    },
-    {
-      "epoch": 3.0,
-      "learning_rate": 2e-05,
-      "loss": 0.3465,
-      "step": 8021
-    },
-    {
-      "epoch": 3.0,
-      "learning_rate": 2e-05,
-      "loss": 0.6339,
-      "step": 8022
-    },
-    {
-      "epoch": 3.0,
-      "learning_rate": 2e-05,
-      "loss": 0.3831,
-      "step": 8023
-    },
-    {
-      "epoch": 3.0,
-      "learning_rate": 2e-05,
-      "loss": 0.4769,
-      "step": 8024
-    },
-    {
-      "epoch": 3.0,
-      "learning_rate": 2e-05,
-      "loss": 0.5142,
-      "step": 8025
-    },
-    {
-      "epoch": 3.0,
-      "step": 8025,
-      "total_flos": 1027959387144192.0,
-      "train_loss": 1.0478906600497593,
-      "train_runtime": 551322.7584,
-      "train_samples_per_second": 0.466,
-      "train_steps_per_second": 0.015
     }
   ],
   "max_steps": 8025,
   "num_train_epochs": 3,
-  "total_flos": 1027959387144192.0,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.989955617846298,
+  "global_step": 8000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2e-05,
       "loss": 0.4429,
       "step": 8000
     }
   ],
   "max_steps": 8025,
   "num_train_epochs": 3,
+  "total_flos": 1024787635126272.0,
   "trial_name": null,
   "trial_params": null
 }