End of training

Browse files

Files changed (5) hide show

README.md +2 -1
all_results.json +8 -0
train_results.json +8 -0
trainer_state.json +252 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: apache-2.0
 base_model: Qwen/Qwen2.5-7B-Instruct
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: 1k_globalbatchsize96_lr2e5_epochs3
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # 1k_globalbatchsize96_lr2e5_epochs3
-This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on an unknown dataset.
 ## Model description

 base_model: Qwen/Qwen2.5-7B-Instruct
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: 1k_globalbatchsize96_lr2e5_epochs3
 # 1k_globalbatchsize96_lr2e5_epochs3
+This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on the mlfoundations-dev/openthoughts_1000 dataset.
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.88,
+    "total_flos": 5.991907970357658e+16,
+    "train_loss": 0.6625967750946681,
+    "train_runtime": 1875.2615,
+    "train_samples_per_second": 1.593,
+    "train_steps_per_second": 0.016
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.88,
+    "total_flos": 5.991907970357658e+16,
+    "train_loss": 0.6625967750946681,
+    "train_runtime": 1875.2615,
+    "train_samples_per_second": 1.593,
+    "train_steps_per_second": 0.016
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,252 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.88,
+  "eval_steps": 500,
+  "global_step": 30,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.096,
+      "grad_norm": 5.814563827025826,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 0.8387,
+      "step": 1
+    },
+    {
+      "epoch": 0.192,
+      "grad_norm": 6.057426975785317,
+      "learning_rate": 1.3333333333333333e-05,
+      "loss": 0.8858,
+      "step": 2
+    },
+    {
+      "epoch": 0.288,
+      "grad_norm": 4.007064490746166,
+      "learning_rate": 2e-05,
+      "loss": 0.8113,
+      "step": 3
+    },
+    {
+      "epoch": 0.384,
+      "grad_norm": 5.004951417256041,
+      "learning_rate": 1.9932383577419432e-05,
+      "loss": 0.8554,
+      "step": 4
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 6.727222736455591,
+      "learning_rate": 1.973044870579824e-05,
+      "loss": 0.8286,
+      "step": 5
+    },
+    {
+      "epoch": 0.576,
+      "grad_norm": 4.803589667848796,
+      "learning_rate": 1.9396926207859085e-05,
+      "loss": 0.8309,
+      "step": 6
+    },
+    {
+      "epoch": 0.672,
+      "grad_norm": 2.590001327760376,
+      "learning_rate": 1.8936326403234125e-05,
+      "loss": 0.7385,
+      "step": 7
+    },
+    {
+      "epoch": 0.768,
+      "grad_norm": 1.6220642820910374,
+      "learning_rate": 1.8354878114129368e-05,
+      "loss": 0.7294,
+      "step": 8
+    },
+    {
+      "epoch": 0.864,
+      "grad_norm": 1.8619806140864155,
+      "learning_rate": 1.766044443118978e-05,
+      "loss": 0.7205,
+      "step": 9
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 1.3645335588645835,
+      "learning_rate": 1.686241637868734e-05,
+      "loss": 0.6953,
+      "step": 10
+    },
+    {
+      "epoch": 1.056,
+      "grad_norm": 2.076186087877376,
+      "learning_rate": 1.5971585917027864e-05,
+      "loss": 0.9725,
+      "step": 11
+    },
+    {
+      "epoch": 1.152,
+      "grad_norm": 0.9149130285668169,
+      "learning_rate": 1.5000000000000002e-05,
+      "loss": 0.6101,
+      "step": 12
+    },
+    {
+      "epoch": 1.248,
+      "grad_norm": 0.705898368962313,
+      "learning_rate": 1.396079766039157e-05,
+      "loss": 0.64,
+      "step": 13
+    },
+    {
+      "epoch": 1.3439999999999999,
+      "grad_norm": 0.7552499556606495,
+      "learning_rate": 1.2868032327110904e-05,
+      "loss": 0.6146,
+      "step": 14
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 0.6896062568680769,
+      "learning_rate": 1.1736481776669307e-05,
+      "loss": 0.5999,
+      "step": 15
+    },
+    {
+      "epoch": 1.536,
+      "grad_norm": 0.6423500551888411,
+      "learning_rate": 1.0581448289104759e-05,
+      "loss": 0.5669,
+      "step": 16
+    },
+    {
+      "epoch": 1.6320000000000001,
+      "grad_norm": 0.5780881539904756,
+      "learning_rate": 9.418551710895243e-06,
+      "loss": 0.6061,
+      "step": 17
+    },
+    {
+      "epoch": 1.728,
+      "grad_norm": 0.6234266417806577,
+      "learning_rate": 8.263518223330698e-06,
+      "loss": 0.5558,
+      "step": 18
+    },
+    {
+      "epoch": 1.8239999999999998,
+      "grad_norm": 0.6553666620141552,
+      "learning_rate": 7.131967672889101e-06,
+      "loss": 0.5946,
+      "step": 19
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 0.47741308815028444,
+      "learning_rate": 6.039202339608432e-06,
+      "loss": 0.5277,
+      "step": 20
+    },
+    {
+      "epoch": 2.016,
+      "grad_norm": 0.7213163625238077,
+      "learning_rate": 5.000000000000003e-06,
+      "loss": 0.8965,
+      "step": 21
+    },
+    {
+      "epoch": 2.112,
+      "grad_norm": 0.47174704404054646,
+      "learning_rate": 4.028414082972141e-06,
+      "loss": 0.5342,
+      "step": 22
+    },
+    {
+      "epoch": 2.208,
+      "grad_norm": 0.42136979721580436,
+      "learning_rate": 3.1375836213126653e-06,
+      "loss": 0.5077,
+      "step": 23
+    },
+    {
+      "epoch": 2.304,
+      "grad_norm": 0.4495453930556151,
+      "learning_rate": 2.339555568810221e-06,
+      "loss": 0.5606,
+      "step": 24
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 0.3819003862342479,
+      "learning_rate": 1.6451218858706374e-06,
+      "loss": 0.4947,
+      "step": 25
+    },
+    {
+      "epoch": 2.496,
+      "grad_norm": 0.4322444996917962,
+      "learning_rate": 1.0636735967658785e-06,
+      "loss": 0.5458,
+      "step": 26
+    },
+    {
+      "epoch": 2.592,
+      "grad_norm": 0.39916696244837735,
+      "learning_rate": 6.030737921409169e-07,
+      "loss": 0.5589,
+      "step": 27
+    },
+    {
+      "epoch": 2.6879999999999997,
+      "grad_norm": 0.3590112191092515,
+      "learning_rate": 2.6955129420176193e-07,
+      "loss": 0.5143,
+      "step": 28
+    },
+    {
+      "epoch": 2.784,
+      "grad_norm": 0.3623303942069575,
+      "learning_rate": 6.761642258056977e-08,
+      "loss": 0.4956,
+      "step": 29
+    },
+    {
+      "epoch": 2.88,
+      "grad_norm": 0.37519040519361285,
+      "learning_rate": 0.0,
+      "loss": 0.5471,
+      "step": 30
+    },
+    {
+      "epoch": 2.88,
+      "step": 30,
+      "total_flos": 5.991907970357658e+16,
+      "train_loss": 0.6625967750946681,
+      "train_runtime": 1875.2615,
+      "train_samples_per_second": 1.593,
+      "train_steps_per_second": 0.016
+    }
+  ],
+  "logging_steps": 1.0,
+  "max_steps": 30,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.991907970357658e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_loss.png ADDED Viewed