End of training

Browse files

Files changed (5) hide show

README.md +2 -1
all_results.json +8 -0
train_results.json +8 -0
trainer_state.json +693 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: apache-2.0
 base_model: Qwen/Qwen2.5-7B-Instruct
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: 3k_globalbatchsize96_lr4e5_epochs3
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # 3k_globalbatchsize96_lr4e5_epochs3
-This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on an unknown dataset.
 ## Model description

 base_model: Qwen/Qwen2.5-7B-Instruct
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: 3k_globalbatchsize96_lr4e5_epochs3
 # 3k_globalbatchsize96_lr4e5_epochs3
+This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on the mlfoundations-dev/openthoughts_3000 dataset.
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.976,
+    "total_flos": 1.818538711009198e+17,
+    "train_loss": 0.5000655266546434,
+    "train_runtime": 9080.5183,
+    "train_samples_per_second": 0.99,
+    "train_steps_per_second": 0.01
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.976,
+    "total_flos": 1.818538711009198e+17,
+    "train_loss": 0.5000655266546434,
+    "train_runtime": 9080.5183,
+    "train_samples_per_second": 0.99,
+    "train_steps_per_second": 0.01
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,693 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.976,
+  "eval_steps": 500,
+  "global_step": 93,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.032,
+      "grad_norm": 6.08677457937853,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.8709,
+      "step": 1
+    },
+    {
+      "epoch": 0.064,
+      "grad_norm": 5.862503603501722,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.8625,
+      "step": 2
+    },
+    {
+      "epoch": 0.096,
+      "grad_norm": 4.5723036455815205,
+      "learning_rate": 1.2e-05,
+      "loss": 0.839,
+      "step": 3
+    },
+    {
+      "epoch": 0.128,
+      "grad_norm": 2.0695238404961547,
+      "learning_rate": 1.6000000000000003e-05,
+      "loss": 0.7376,
+      "step": 4
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 5.589012282844458,
+      "learning_rate": 2e-05,
+      "loss": 0.825,
+      "step": 5
+    },
+    {
+      "epoch": 0.192,
+      "grad_norm": 8.490554966738163,
+      "learning_rate": 2.4e-05,
+      "loss": 0.8478,
+      "step": 6
+    },
+    {
+      "epoch": 0.224,
+      "grad_norm": 6.29894861103046,
+      "learning_rate": 2.8e-05,
+      "loss": 0.7658,
+      "step": 7
+    },
+    {
+      "epoch": 0.256,
+      "grad_norm": 3.558435531359445,
+      "learning_rate": 3.2000000000000005e-05,
+      "loss": 0.7555,
+      "step": 8
+    },
+    {
+      "epoch": 0.288,
+      "grad_norm": 2.722863714076688,
+      "learning_rate": 3.6e-05,
+      "loss": 0.7274,
+      "step": 9
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 2.088640250697761,
+      "learning_rate": 4e-05,
+      "loss": 0.6627,
+      "step": 10
+    },
+    {
+      "epoch": 0.352,
+      "grad_norm": 1.5096876602344986,
+      "learning_rate": 3.998567509632663e-05,
+      "loss": 0.6817,
+      "step": 11
+    },
+    {
+      "epoch": 0.384,
+      "grad_norm": 1.6896637753034105,
+      "learning_rate": 3.9942720905593045e-05,
+      "loss": 0.6761,
+      "step": 12
+    },
+    {
+      "epoch": 0.416,
+      "grad_norm": 1.5060644054718806,
+      "learning_rate": 3.98711989592637e-05,
+      "loss": 0.6519,
+      "step": 13
+    },
+    {
+      "epoch": 0.448,
+      "grad_norm": 1.4049366508424377,
+      "learning_rate": 3.9771211711837774e-05,
+      "loss": 0.6333,
+      "step": 14
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 1.411091031460123,
+      "learning_rate": 3.9642902394084056e-05,
+      "loss": 0.5874,
+      "step": 15
+    },
+    {
+      "epoch": 0.512,
+      "grad_norm": 0.8883192699998052,
+      "learning_rate": 3.948645480786427e-05,
+      "loss": 0.6116,
+      "step": 16
+    },
+    {
+      "epoch": 0.544,
+      "grad_norm": 1.129257283929009,
+      "learning_rate": 3.930209306283867e-05,
+      "loss": 0.5852,
+      "step": 17
+    },
+    {
+      "epoch": 0.576,
+      "grad_norm": 0.901306643045694,
+      "learning_rate": 3.909008125543111e-05,
+      "loss": 0.5821,
+      "step": 18
+    },
+    {
+      "epoch": 0.608,
+      "grad_norm": 0.9465933700636773,
+      "learning_rate": 3.885072309051346e-05,
+      "loss": 0.5833,
+      "step": 19
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.8046120689103757,
+      "learning_rate": 3.858436144635131e-05,
+      "loss": 0.5597,
+      "step": 20
+    },
+    {
+      "epoch": 0.672,
+      "grad_norm": 0.9228859026973337,
+      "learning_rate": 3.829137788343415e-05,
+      "loss": 0.5571,
+      "step": 21
+    },
+    {
+      "epoch": 0.704,
+      "grad_norm": 0.9202261747479373,
+      "learning_rate": 3.797219209789365e-05,
+      "loss": 0.6007,
+      "step": 22
+    },
+    {
+      "epoch": 0.736,
+      "grad_norm": 0.9336252436543613,
+      "learning_rate": 3.762726132029298e-05,
+      "loss": 0.5618,
+      "step": 23
+    },
+    {
+      "epoch": 0.768,
+      "grad_norm": 1.2080051557421407,
+      "learning_rate": 3.725707966064846e-05,
+      "loss": 0.6057,
+      "step": 24
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 1.1231601773572062,
+      "learning_rate": 3.686217740062169e-05,
+      "loss": 0.5603,
+      "step": 25
+    },
+    {
+      "epoch": 0.832,
+      "grad_norm": 0.8382981788712457,
+      "learning_rate": 3.644312023389621e-05,
+      "loss": 0.5491,
+      "step": 26
+    },
+    {
+      "epoch": 0.864,
+      "grad_norm": 1.1862939971134692,
+      "learning_rate": 3.600050845582669e-05,
+      "loss": 0.5887,
+      "step": 27
+    },
+    {
+      "epoch": 0.896,
+      "grad_norm": 1.0957634618366314,
+      "learning_rate": 3.5534976103521716e-05,
+      "loss": 0.5958,
+      "step": 28
+    },
+    {
+      "epoch": 0.928,
+      "grad_norm": 0.6590817382859444,
+      "learning_rate": 3.504719004759163e-05,
+      "loss": 0.5528,
+      "step": 29
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 0.875749033008583,
+      "learning_rate": 3.4537849036862874e-05,
+      "loss": 0.565,
+      "step": 30
+    },
+    {
+      "epoch": 0.992,
+      "grad_norm": 0.783563683071678,
+      "learning_rate": 3.400768269742702e-05,
+      "loss": 0.5595,
+      "step": 31
+    },
+    {
+      "epoch": 1.024,
+      "grad_norm": 1.4075572002103194,
+      "learning_rate": 3.345745048745838e-05,
+      "loss": 0.919,
+      "step": 32
+    },
+    {
+      "epoch": 1.056,
+      "grad_norm": 0.7062499705952857,
+      "learning_rate": 3.288794060929754e-05,
+      "loss": 0.4404,
+      "step": 33
+    },
+    {
+      "epoch": 1.088,
+      "grad_norm": 0.9967541412280615,
+      "learning_rate": 3.229996888035908e-05,
+      "loss": 0.4984,
+      "step": 34
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 0.8310704240859569,
+      "learning_rate": 3.169437756448095e-05,
+      "loss": 0.4807,
+      "step": 35
+    },
+    {
+      "epoch": 1.152,
+      "grad_norm": 0.7016314595292313,
+      "learning_rate": 3.107203416538969e-05,
+      "loss": 0.4703,
+      "step": 36
+    },
+    {
+      "epoch": 1.184,
+      "grad_norm": 1.4372935885534768,
+      "learning_rate": 3.0433830184009694e-05,
+      "loss": 0.4739,
+      "step": 37
+    },
+    {
+      "epoch": 1.216,
+      "grad_norm": 0.8650508524606009,
+      "learning_rate": 2.9780679841396668e-05,
+      "loss": 0.4525,
+      "step": 38
+    },
+    {
+      "epoch": 1.248,
+      "grad_norm": 1.0766618304625992,
+      "learning_rate": 2.9113518769124836e-05,
+      "loss": 0.4987,
+      "step": 39
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 0.6767070574855524,
+      "learning_rate": 2.843330266900368e-05,
+      "loss": 0.4475,
+      "step": 40
+    },
+    {
+      "epoch": 1.312,
+      "grad_norm": 1.026763400096595,
+      "learning_rate": 2.774100594404435e-05,
+      "loss": 0.4667,
+      "step": 41
+    },
+    {
+      "epoch": 1.3439999999999999,
+      "grad_norm": 0.9002290625499892,
+      "learning_rate": 2.703762030263666e-05,
+      "loss": 0.4916,
+      "step": 42
+    },
+    {
+      "epoch": 1.376,
+      "grad_norm": 0.9903797867735974,
+      "learning_rate": 2.632415333793648e-05,
+      "loss": 0.4771,
+      "step": 43
+    },
+    {
+      "epoch": 1.408,
+      "grad_norm": 0.7218070561744779,
+      "learning_rate": 2.5601627084498146e-05,
+      "loss": 0.407,
+      "step": 44
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 0.8313194407823631,
+      "learning_rate": 2.4871076554219838e-05,
+      "loss": 0.4442,
+      "step": 45
+    },
+    {
+      "epoch": 1.472,
+      "grad_norm": 0.8236730874850681,
+      "learning_rate": 2.413354825369906e-05,
+      "loss": 0.5223,
+      "step": 46
+    },
+    {
+      "epoch": 1.504,
+      "grad_norm": 0.6125019115754542,
+      "learning_rate": 2.3390098685121938e-05,
+      "loss": 0.42,
+      "step": 47
+    },
+    {
+      "epoch": 1.536,
+      "grad_norm": 0.6737099841054438,
+      "learning_rate": 2.264179283283405e-05,
+      "loss": 0.4665,
+      "step": 48
+    },
+    {
+      "epoch": 1.568,
+      "grad_norm": 0.5907056602966384,
+      "learning_rate": 2.1889702637760627e-05,
+      "loss": 0.4445,
+      "step": 49
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.6965345367451425,
+      "learning_rate": 2.1134905461861486e-05,
+      "loss": 0.5221,
+      "step": 50
+    },
+    {
+      "epoch": 1.6320000000000001,
+      "grad_norm": 0.5006105897500711,
+      "learning_rate": 2.0378482544820383e-05,
+      "loss": 0.4218,
+      "step": 51
+    },
+    {
+      "epoch": 1.6640000000000001,
+      "grad_norm": 0.6174830888739168,
+      "learning_rate": 1.9621517455179627e-05,
+      "loss": 0.476,
+      "step": 52
+    },
+    {
+      "epoch": 1.696,
+      "grad_norm": 0.503990909520708,
+      "learning_rate": 1.886509453813852e-05,
+      "loss": 0.4275,
+      "step": 53
+    },
+    {
+      "epoch": 1.728,
+      "grad_norm": 0.5974502021657286,
+      "learning_rate": 1.8110297362239376e-05,
+      "loss": 0.4757,
+      "step": 54
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 0.551329913445271,
+      "learning_rate": 1.735820716716596e-05,
+      "loss": 0.4757,
+      "step": 55
+    },
+    {
+      "epoch": 1.792,
+      "grad_norm": 0.579064058765967,
+      "learning_rate": 1.660990131487807e-05,
+      "loss": 0.4182,
+      "step": 56
+    },
+    {
+      "epoch": 1.8239999999999998,
+      "grad_norm": 0.548961385728647,
+      "learning_rate": 1.586645174630094e-05,
+      "loss": 0.4731,
+      "step": 57
+    },
+    {
+      "epoch": 1.8559999999999999,
+      "grad_norm": 0.5764590922933827,
+      "learning_rate": 1.5128923445780163e-05,
+      "loss": 0.4271,
+      "step": 58
+    },
+    {
+      "epoch": 1.888,
+      "grad_norm": 0.5781209646736115,
+      "learning_rate": 1.4398372915501862e-05,
+      "loss": 0.4644,
+      "step": 59
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 0.42862823912049036,
+      "learning_rate": 1.3675846662063521e-05,
+      "loss": 0.4071,
+      "step": 60
+    },
+    {
+      "epoch": 1.952,
+      "grad_norm": 0.569159061133135,
+      "learning_rate": 1.296237969736334e-05,
+      "loss": 0.4561,
+      "step": 61
+    },
+    {
+      "epoch": 1.984,
+      "grad_norm": 0.47530501617214926,
+      "learning_rate": 1.2258994055955658e-05,
+      "loss": 0.3817,
+      "step": 62
+    },
+    {
+      "epoch": 2.016,
+      "grad_norm": 0.9251054706846084,
+      "learning_rate": 1.156669733099632e-05,
+      "loss": 0.7898,
+      "step": 63
+    },
+    {
+      "epoch": 2.048,
+      "grad_norm": 0.5716376901076641,
+      "learning_rate": 1.0886481230875172e-05,
+      "loss": 0.3525,
+      "step": 64
+    },
+    {
+      "epoch": 2.08,
+      "grad_norm": 0.5177936896448316,
+      "learning_rate": 1.0219320158603337e-05,
+      "loss": 0.3394,
+      "step": 65
+    },
+    {
+      "epoch": 2.112,
+      "grad_norm": 0.5247806458447061,
+      "learning_rate": 9.566169815990311e-06,
+      "loss": 0.3834,
+      "step": 66
+    },
+    {
+      "epoch": 2.144,
+      "grad_norm": 0.5455234154497576,
+      "learning_rate": 8.92796583461031e-06,
+      "loss": 0.3577,
+      "step": 67
+    },
+    {
+      "epoch": 2.176,
+      "grad_norm": 0.5925096096878631,
+      "learning_rate": 8.305622435519058e-06,
+      "loss": 0.3831,
+      "step": 68
+    },
+    {
+      "epoch": 2.208,
+      "grad_norm": 0.6820182428585542,
+      "learning_rate": 7.70003111964093e-06,
+      "loss": 0.376,
+      "step": 69
+    },
+    {
+      "epoch": 2.24,
+      "grad_norm": 0.6320620904769954,
+      "learning_rate": 7.112059390702459e-06,
+      "loss": 0.3715,
+      "step": 70
+    },
+    {
+      "epoch": 2.2720000000000002,
+      "grad_norm": 0.4928197929862798,
+      "learning_rate": 6.542549512541623e-06,
+      "loss": 0.3713,
+      "step": 71
+    },
+    {
+      "epoch": 2.304,
+      "grad_norm": 0.4767133735569691,
+      "learning_rate": 5.9923173025729895e-06,
+      "loss": 0.3303,
+      "step": 72
+    },
+    {
+      "epoch": 2.336,
+      "grad_norm": 0.7059312169326228,
+      "learning_rate": 5.462150963137125e-06,
+      "loss": 0.4568,
+      "step": 73
+    },
+    {
+      "epoch": 2.368,
+      "grad_norm": 0.5003954153261982,
+      "learning_rate": 4.952809952408375e-06,
+      "loss": 0.3514,
+      "step": 74
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 0.5944579046423205,
+      "learning_rate": 4.465023896478293e-06,
+      "loss": 0.3627,
+      "step": 75
+    },
+    {
+      "epoch": 2.432,
+      "grad_norm": 0.4180864471254852,
+      "learning_rate": 3.999491544173311e-06,
+      "loss": 0.3054,
+      "step": 76
+    },
+    {
+      "epoch": 2.464,
+      "grad_norm": 0.44019542396333683,
+      "learning_rate": 3.5568797661038004e-06,
+      "loss": 0.375,
+      "step": 77
+    },
+    {
+      "epoch": 2.496,
+      "grad_norm": 0.4294310051147678,
+      "learning_rate": 3.137822599378315e-06,
+      "loss": 0.3537,
+      "step": 78
+    },
+    {
+      "epoch": 2.528,
+      "grad_norm": 0.3888176996168452,
+      "learning_rate": 2.7429203393515426e-06,
+      "loss": 0.378,
+      "step": 79
+    },
+    {
+      "epoch": 2.56,
+      "grad_norm": 0.3576643131385393,
+      "learning_rate": 2.372738679707023e-06,
+      "loss": 0.3232,
+      "step": 80
+    },
+    {
+      "epoch": 2.592,
+      "grad_norm": 0.3696160994931973,
+      "learning_rate": 2.02780790210636e-06,
+      "loss": 0.3542,
+      "step": 81
+    },
+    {
+      "epoch": 2.624,
+      "grad_norm": 0.36882809964571234,
+      "learning_rate": 1.7086221165658544e-06,
+      "loss": 0.351,
+      "step": 82
+    },
+    {
+      "epoch": 2.656,
+      "grad_norm": 0.34603236345776744,
+      "learning_rate": 1.4156385536486973e-06,
+      "loss": 0.3212,
+      "step": 83
+    },
+    {
+      "epoch": 2.6879999999999997,
+      "grad_norm": 0.3805023899734686,
+      "learning_rate": 1.1492769094865475e-06,
+      "loss": 0.3744,
+      "step": 84
+    },
+    {
+      "epoch": 2.7199999999999998,
+      "grad_norm": 0.3005431184449355,
+      "learning_rate": 9.099187445688984e-07,
+      "loss": 0.3071,
+      "step": 85
+    },
+    {
+      "epoch": 2.752,
+      "grad_norm": 0.3625697661026582,
+      "learning_rate": 6.979069371613345e-07,
+      "loss": 0.3755,
+      "step": 86
+    },
+    {
+      "epoch": 2.784,
+      "grad_norm": 0.30922554074419895,
+      "learning_rate": 5.135451921357337e-07,
+      "loss": 0.2993,
+      "step": 87
+    },
+    {
+      "epoch": 2.816,
+      "grad_norm": 0.3209396414531254,
+      "learning_rate": 3.570976059159481e-07,
+      "loss": 0.3725,
+      "step": 88
+    },
+    {
+      "epoch": 2.848,
+      "grad_norm": 0.3188890164441534,
+      "learning_rate": 2.2878828816222942e-07,
+      "loss": 0.3691,
+      "step": 89
+    },
+    {
+      "epoch": 2.88,
+      "grad_norm": 0.30765746035254077,
+      "learning_rate": 1.2880104073630163e-07,
+      "loss": 0.3218,
+      "step": 90
+    },
+    {
+      "epoch": 2.912,
+      "grad_norm": 0.3085708159717203,
+      "learning_rate": 5.7279094406959e-08,
+      "loss": 0.3625,
+      "step": 91
+    },
+    {
+      "epoch": 2.944,
+      "grad_norm": 0.3092205424287526,
+      "learning_rate": 1.4324903673370583e-08,
+      "loss": 0.3543,
+      "step": 92
+    },
+    {
+      "epoch": 2.976,
+      "grad_norm": 0.3121521435612877,
+      "learning_rate": 0.0,
+      "loss": 0.3917,
+      "step": 93
+    },
+    {
+      "epoch": 2.976,
+      "step": 93,
+      "total_flos": 1.818538711009198e+17,
+      "train_loss": 0.5000655266546434,
+      "train_runtime": 9080.5183,
+      "train_samples_per_second": 0.99,
+      "train_steps_per_second": 0.01
+    }
+  ],
+  "logging_steps": 1.0,
+  "max_steps": 93,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.818538711009198e+17,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_loss.png ADDED Viewed