Model save

Files changed (11) hide show

README.md CHANGED Viewed

@@ -19,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [facebook/opt-6.7b](https://huggingface.co/facebook/opt-6.7b) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.4492
 ## Model description
@@ -48,14 +48,14 @@ The following hyperparameters were used during training:
 - total_eval_batch_size: 512
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
-- training_steps: 20
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.8817        | 0.08  | 20   | 1.4492          |
 ### Framework versions

 This model is a fine-tuned version of [facebook/opt-6.7b](https://huggingface.co/facebook/opt-6.7b) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.2324
 ## Model description
 - total_eval_batch_size: 512
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
+- num_epochs: 1.0
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 1.8817        | 1.0   | 253  | 1.2324          |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-    "epoch": 0.08,
-    "eval_loss": 1.44921875,
-    "eval_runtime": 79.3699,
     "eval_samples": 23110,
-    "eval_samples_per_second": 179.904,
-    "eval_steps_per_second": 0.353,
-    "train_loss": 1.8745559692382812,
-    "train_runtime": 266.5177,
     "train_samples": 207865,
-    "train_samples_per_second": 38.421,
-    "train_steps_per_second": 0.075
 }

 {
+    "epoch": 1.0,
+    "eval_loss": 1.232421875,
+    "eval_runtime": 77.6546,
     "eval_samples": 23110,
+    "eval_samples_per_second": 183.878,
+    "eval_steps_per_second": 0.361,
+    "train_loss": 1.3124533189615242,
+    "train_runtime": 2403.4192,
     "train_samples": 207865,
+    "train_samples_per_second": 53.713,
+    "train_steps_per_second": 0.105
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 0.08,
-    "eval_loss": 1.44921875,
-    "eval_runtime": 79.3699,
     "eval_samples": 23110,
-    "eval_samples_per_second": 179.904,
-    "eval_steps_per_second": 0.353
 }

 {
+    "epoch": 1.0,
+    "eval_loss": 1.232421875,
+    "eval_runtime": 77.6546,
     "eval_samples": 23110,
+    "eval_samples_per_second": 183.878,
+    "eval_steps_per_second": 0.361
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65a238c8eb52abdad746179f691016350d45345a2bd1bf5c1a065ab1f2a0c603
 size 4993283928

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c4289f04d4edc6c9315e9815ea32582470bc39aa823b7d35e28ae54a35cdba4
 size 4993283928

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e2ae75313cd9c65c7b2a39d4c0919924c95434c0f304d74177ea54327f9e9db
 size 4967389600

 version https://git-lfs.github.com/spec/v1
+oid sha256:e52c3e8e8c71be2c0f021aa4333e23add6b2d48f1876364ed2706d848d099ddd
 size 4967389600

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf36073a57afed545b43c124b7287ba64a9b93035a5d3baba829fece44d33404
 size 3356335328

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca916dd5bca30e94bb8358e1838ad53fefa61680ba94e8a2ac4e02ff262e18b7
 size 3356335328

runs/Jan03_19-09-43_aga39/events.out.tfevents.1704330610.aga39.146871.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:4434bcdf73670fc87228b21722263b172730b7ef996306524302b7d9f6f8cfed
+size 5261

runs/Jan03_19-09-43_aga39/events.out.tfevents.1704333092.aga39.146871.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:3dac238c4d2f4e5beef93d30f9d9a18f2ed6034e3a005b680ec561eecb56121f
+size 359

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 0.08,
-    "train_loss": 1.8745559692382812,
-    "train_runtime": 266.5177,
     "train_samples": 207865,
-    "train_samples_per_second": 38.421,
-    "train_steps_per_second": 0.075
 }

 {
+    "epoch": 1.0,
+    "train_loss": 1.3124533189615242,
+    "train_runtime": 2403.4192,
     "train_samples": 207865,
+    "train_samples_per_second": 53.713,
+    "train_steps_per_second": 0.105
 }

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.07905138339920949,
   "eval_steps": 500,
-  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -15,29 +15,29 @@
       "step": 1
     },
     {
-      "epoch": 0.08,
-      "eval_loss": 1.44921875,
-      "eval_runtime": 79.4742,
-      "eval_samples_per_second": 179.668,
-      "eval_steps_per_second": 0.352,
-      "step": 20
     },
     {
-      "epoch": 0.08,
-      "step": 20,
-      "total_flos": 32315333935104.0,
-      "train_loss": 1.8745559692382812,
-      "train_runtime": 266.5177,
-      "train_samples_per_second": 38.421,
-      "train_steps_per_second": 0.075
     }
   ],
   "logging_steps": 500,
-  "max_steps": 20,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
-  "total_flos": 32315333935104.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 253,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "step": 1
     },
     {
+      "epoch": 1.0,
+      "eval_loss": 1.232421875,
+      "eval_runtime": 77.7676,
+      "eval_samples_per_second": 183.611,
+      "eval_steps_per_second": 0.36,
+      "step": 253
     },
     {
+      "epoch": 1.0,
+      "step": 253,
+      "total_flos": 2542139602894848.0,
+      "train_loss": 1.3124533189615242,
+      "train_runtime": 2403.4192,
+      "train_samples_per_second": 53.713,
+      "train_steps_per_second": 0.105
     }
   ],
   "logging_steps": 500,
+  "max_steps": 253,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
+  "total_flos": 2542139602894848.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:402b64c314fb1d2ccb1804f480a62b3e5ef5e1207665ac66ee405e3f0121a314
 size 5307

 version https://git-lfs.github.com/spec/v1
+oid sha256:078ba5ed03c0d0b0273bc2394994d489301db1cb89ad2153078ce344f565055e
 size 5307