End of training

Files changed (5) hide show

README.md CHANGED Viewed

@@ -2,11 +2,24 @@
 library_name: transformers
 tags:
 - generated_from_trainer
 metrics:
 - accuracy
 model-index:
 - name: T5LA
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,10 +28,10 @@ should probably proofread and complete it, then remove this comment. -->
 [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/uoy/llm_training/runs/pzcq293g)
 # T5LA
-This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Accuracy: 0.0322
 - Loss: 5.5470
 ## Model description

 library_name: transformers
 tags:
 - generated_from_trainer
+datasets:
+- HuggingFaceFW/fineweb
 metrics:
 - accuracy
 model-index:
 - name: T5LA
+  results:
+  - task:
+      name: Causal Language Modeling
+      type: text-generation
+    dataset:
+      name: HuggingFaceFW/fineweb sample-10BT
+      type: HuggingFaceFW/fineweb
+      args: sample-10BT
+    metrics:
+    - name: Accuracy
+      type: accuracy
+      value: 0.0322300343763811
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/uoy/llm_training/runs/pzcq293g)
 # T5LA
+This model is a fine-tuned version of [](https://huggingface.co/) on the HuggingFaceFW/fineweb sample-10BT dataset.
 It achieves the following results on the evaluation set:
 - Loss: 5.5470
+- Accuracy: 0.0322
 ## Model description

all_results.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
-    "epoch": 1.00001,
-    "eval_accuracy": 0.03222989830774154,
-    "eval_loss": 5.5469770431518555,
-    "eval_runtime": 110.5546,
     "eval_samples": 10000,
-    "eval_samples_per_second": 32.491,
-    "eval_steps_per_second": 2.035,
-    "perplexity": 256.46111204397334,
-    "total_flos": 9.182126159167488e+17,
-    "train_loss": 5.625401986489168e-05,
-    "train_runtime": 26.8473,
     "train_samples": 1000000,
-    "train_samples_per_second": 59596.412,
-    "train_steps_per_second": 3724.776
 }

 {
+    "epoch": 2.1069,
+    "eval_accuracy": 0.0322300343763811,
+    "eval_loss": 5.546974182128906,
+    "eval_runtime": 116.7315,
     "eval_samples": 10000,
+    "eval_samples_per_second": 30.771,
+    "eval_steps_per_second": 1.927,
+    "perplexity": 256.4603783038958,
+    "total_flos": 9.182034338135409e+17,
+    "train_loss": 0.0,
+    "train_runtime": 866.5464,
     "train_samples": 1000000,
+    "train_samples_per_second": 3692.82,
+    "train_steps_per_second": 230.801
 }

eval_results.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-    "epoch": 1.00001,
-    "eval_accuracy": 0.03222989830774154,
-    "eval_loss": 5.5469770431518555,
-    "eval_runtime": 110.5546,
     "eval_samples": 10000,
-    "eval_samples_per_second": 32.491,
-    "eval_steps_per_second": 2.035,
-    "perplexity": 256.46111204397334
 }

 {
+    "epoch": 2.1069,
+    "eval_accuracy": 0.0322300343763811,
+    "eval_loss": 5.546974182128906,
+    "eval_runtime": 116.7315,
     "eval_samples": 10000,
+    "eval_samples_per_second": 30.771,
+    "eval_steps_per_second": 1.927,
+    "perplexity": 256.4603783038958
 }

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 1.00001,
-    "total_flos": 9.182126159167488e+17,
-    "train_loss": 5.625401986489168e-05,
-    "train_runtime": 26.8473,
     "train_samples": 1000000,
-    "train_samples_per_second": 59596.412,
-    "train_steps_per_second": 3724.776
 }

 {
+    "epoch": 2.1069,
+    "total_flos": 9.182034338135409e+17,
+    "train_loss": 0.0,
+    "train_runtime": 866.5464,
     "train_samples": 1000000,
+    "train_samples_per_second": 3692.82,
+    "train_steps_per_second": 230.801
 }

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 5.546974182128906,
   "best_model_checkpoint": "/users/hr1171/scratch/T5LA/checkpoint-100000",
-  "epoch": 1.00001,
   "eval_steps": 1000,
-  "global_step": 100001,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2309,17 +2309,17 @@
       "step": 100000
     },
     {
-      "epoch": 1.00001,
-      "step": 100001,
-      "total_flos": 9.182126159167488e+17,
-      "train_loss": 5.625401986489168e-05,
-      "train_runtime": 26.8473,
-      "train_samples_per_second": 59596.412,
-      "train_steps_per_second": 3724.776
     }
   ],
   "logging_steps": 500,
-  "max_steps": 100000,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 9223372036854775807,
   "save_steps": 500,
@@ -2335,7 +2335,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.182126159167488e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 5.546974182128906,
   "best_model_checkpoint": "/users/hr1171/scratch/T5LA/checkpoint-100000",
+  "epoch": 2.1069,
   "eval_steps": 1000,
+  "global_step": 100000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "step": 100000
     },
     {
+      "epoch": 2.1069,
+      "step": 100000,
+      "total_flos": 9.182034338135409e+17,
+      "train_loss": 0.0,
+      "train_runtime": 866.5464,
+      "train_samples_per_second": 3692.82,
+      "train_steps_per_second": 230.801
     }
   ],
   "logging_steps": 500,
+  "max_steps": 200000,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 9223372036854775807,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 9.182034338135409e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null