End of training

Browse files

Files changed (7) hide show

README.md +2 -1
all_results.json +12 -0
eval_results.json +7 -0
train_results.json +8 -0
trainer_state.json +640 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama3.1
 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_camel_math_x.25
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # oh_v1.3_camel_math_x.25
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7211

 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_camel_math_x.25
 # oh_v1.3_camel_math_x.25
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/oh_v1.3_camel_math_x.25 dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7211

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 2.995899772209567,
+    "eval_loss": 0.7211272120475769,
+    "eval_runtime": 291.637,
+    "eval_samples_per_second": 25.347,
+    "eval_steps_per_second": 0.398,
+    "total_flos": 1376671236096000.0,
+    "train_loss": 0.6964337152866261,
+    "train_runtime": 48380.1477,
+    "train_samples_per_second": 8.709,
+    "train_steps_per_second": 0.017
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 2.995899772209567,
+    "eval_loss": 0.7211272120475769,
+    "eval_runtime": 291.637,
+    "eval_samples_per_second": 25.347,
+    "eval_steps_per_second": 0.398
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.995899772209567,
+    "total_flos": 1376671236096000.0,
+    "train_loss": 0.6964337152866261,
+    "train_runtime": 48380.1477,
+    "train_samples_per_second": 8.709,
+    "train_steps_per_second": 0.017
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,640 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.995899772209567,
+  "eval_steps": 500,
+  "global_step": 822,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03644646924829157,
+      "grad_norm": 13.273536409408566,
+      "learning_rate": 5e-06,
+      "loss": 1.0215,
+      "step": 10
+    },
+    {
+      "epoch": 0.07289293849658314,
+      "grad_norm": 2.0256934340222124,
+      "learning_rate": 5e-06,
+      "loss": 0.901,
+      "step": 20
+    },
+    {
+      "epoch": 0.10933940774487472,
+      "grad_norm": 1.1110745496419128,
+      "learning_rate": 5e-06,
+      "loss": 0.8577,
+      "step": 30
+    },
+    {
+      "epoch": 0.14578587699316628,
+      "grad_norm": 1.2500399664289437,
+      "learning_rate": 5e-06,
+      "loss": 0.8319,
+      "step": 40
+    },
+    {
+      "epoch": 0.18223234624145787,
+      "grad_norm": 0.993614371192878,
+      "learning_rate": 5e-06,
+      "loss": 0.8081,
+      "step": 50
+    },
+    {
+      "epoch": 0.21867881548974943,
+      "grad_norm": 0.97363008151794,
+      "learning_rate": 5e-06,
+      "loss": 0.7963,
+      "step": 60
+    },
+    {
+      "epoch": 0.255125284738041,
+      "grad_norm": 0.9129099469883597,
+      "learning_rate": 5e-06,
+      "loss": 0.7888,
+      "step": 70
+    },
+    {
+      "epoch": 0.29157175398633256,
+      "grad_norm": 0.9344193654869811,
+      "learning_rate": 5e-06,
+      "loss": 0.7758,
+      "step": 80
+    },
+    {
+      "epoch": 0.32801822323462415,
+      "grad_norm": 1.0188210165751896,
+      "learning_rate": 5e-06,
+      "loss": 0.775,
+      "step": 90
+    },
+    {
+      "epoch": 0.36446469248291574,
+      "grad_norm": 0.9232347440747942,
+      "learning_rate": 5e-06,
+      "loss": 0.7692,
+      "step": 100
+    },
+    {
+      "epoch": 0.4009111617312073,
+      "grad_norm": 0.6783140292643164,
+      "learning_rate": 5e-06,
+      "loss": 0.7621,
+      "step": 110
+    },
+    {
+      "epoch": 0.43735763097949887,
+      "grad_norm": 0.6184031681315292,
+      "learning_rate": 5e-06,
+      "loss": 0.7556,
+      "step": 120
+    },
+    {
+      "epoch": 0.47380410022779046,
+      "grad_norm": 0.5469349607429109,
+      "learning_rate": 5e-06,
+      "loss": 0.7551,
+      "step": 130
+    },
+    {
+      "epoch": 0.510250569476082,
+      "grad_norm": 0.8877445271536203,
+      "learning_rate": 5e-06,
+      "loss": 0.7554,
+      "step": 140
+    },
+    {
+      "epoch": 0.5466970387243736,
+      "grad_norm": 1.0291306012367956,
+      "learning_rate": 5e-06,
+      "loss": 0.7495,
+      "step": 150
+    },
+    {
+      "epoch": 0.5831435079726651,
+      "grad_norm": 0.6607692936776239,
+      "learning_rate": 5e-06,
+      "loss": 0.7459,
+      "step": 160
+    },
+    {
+      "epoch": 0.6195899772209568,
+      "grad_norm": 0.6336927673267501,
+      "learning_rate": 5e-06,
+      "loss": 0.748,
+      "step": 170
+    },
+    {
+      "epoch": 0.6560364464692483,
+      "grad_norm": 0.8944901762240539,
+      "learning_rate": 5e-06,
+      "loss": 0.7452,
+      "step": 180
+    },
+    {
+      "epoch": 0.6924829157175398,
+      "grad_norm": 0.8843897330408937,
+      "learning_rate": 5e-06,
+      "loss": 0.7445,
+      "step": 190
+    },
+    {
+      "epoch": 0.7289293849658315,
+      "grad_norm": 0.6972008986400734,
+      "learning_rate": 5e-06,
+      "loss": 0.7385,
+      "step": 200
+    },
+    {
+      "epoch": 0.765375854214123,
+      "grad_norm": 0.5894622066220608,
+      "learning_rate": 5e-06,
+      "loss": 0.7375,
+      "step": 210
+    },
+    {
+      "epoch": 0.8018223234624146,
+      "grad_norm": 0.5755055010916849,
+      "learning_rate": 5e-06,
+      "loss": 0.7366,
+      "step": 220
+    },
+    {
+      "epoch": 0.8382687927107062,
+      "grad_norm": 0.6956357933104967,
+      "learning_rate": 5e-06,
+      "loss": 0.7418,
+      "step": 230
+    },
+    {
+      "epoch": 0.8747152619589977,
+      "grad_norm": 0.7222014717098794,
+      "learning_rate": 5e-06,
+      "loss": 0.7289,
+      "step": 240
+    },
+    {
+      "epoch": 0.9111617312072893,
+      "grad_norm": 0.6509301599056833,
+      "learning_rate": 5e-06,
+      "loss": 0.7353,
+      "step": 250
+    },
+    {
+      "epoch": 0.9476082004555809,
+      "grad_norm": 0.5946415096003963,
+      "learning_rate": 5e-06,
+      "loss": 0.739,
+      "step": 260
+    },
+    {
+      "epoch": 0.9840546697038725,
+      "grad_norm": 0.6399983053851734,
+      "learning_rate": 5e-06,
+      "loss": 0.7315,
+      "step": 270
+    },
+    {
+      "epoch": 0.9986332574031891,
+      "eval_loss": 0.7315455079078674,
+      "eval_runtime": 290.5947,
+      "eval_samples_per_second": 25.437,
+      "eval_steps_per_second": 0.399,
+      "step": 274
+    },
+    {
+      "epoch": 1.020501138952164,
+      "grad_norm": 0.8395093526707935,
+      "learning_rate": 5e-06,
+      "loss": 0.747,
+      "step": 280
+    },
+    {
+      "epoch": 1.0569476082004556,
+      "grad_norm": 0.9199729633560787,
+      "learning_rate": 5e-06,
+      "loss": 0.6787,
+      "step": 290
+    },
+    {
+      "epoch": 1.0933940774487472,
+      "grad_norm": 0.6628677057491944,
+      "learning_rate": 5e-06,
+      "loss": 0.6791,
+      "step": 300
+    },
+    {
+      "epoch": 1.1298405466970387,
+      "grad_norm": 0.6614989831751948,
+      "learning_rate": 5e-06,
+      "loss": 0.6854,
+      "step": 310
+    },
+    {
+      "epoch": 1.1662870159453302,
+      "grad_norm": 0.6964522514874895,
+      "learning_rate": 5e-06,
+      "loss": 0.6763,
+      "step": 320
+    },
+    {
+      "epoch": 1.2027334851936218,
+      "grad_norm": 0.7090306269606215,
+      "learning_rate": 5e-06,
+      "loss": 0.6753,
+      "step": 330
+    },
+    {
+      "epoch": 1.2391799544419135,
+      "grad_norm": 0.648532712130652,
+      "learning_rate": 5e-06,
+      "loss": 0.68,
+      "step": 340
+    },
+    {
+      "epoch": 1.275626423690205,
+      "grad_norm": 0.7822954196339824,
+      "learning_rate": 5e-06,
+      "loss": 0.6817,
+      "step": 350
+    },
+    {
+      "epoch": 1.3120728929384966,
+      "grad_norm": 0.6766423459315555,
+      "learning_rate": 5e-06,
+      "loss": 0.6803,
+      "step": 360
+    },
+    {
+      "epoch": 1.3485193621867881,
+      "grad_norm": 0.7731309625470634,
+      "learning_rate": 5e-06,
+      "loss": 0.6788,
+      "step": 370
+    },
+    {
+      "epoch": 1.3849658314350797,
+      "grad_norm": 0.6229285700860081,
+      "learning_rate": 5e-06,
+      "loss": 0.6856,
+      "step": 380
+    },
+    {
+      "epoch": 1.4214123006833712,
+      "grad_norm": 0.6927410350677501,
+      "learning_rate": 5e-06,
+      "loss": 0.6808,
+      "step": 390
+    },
+    {
+      "epoch": 1.4578587699316627,
+      "grad_norm": 0.834486739783265,
+      "learning_rate": 5e-06,
+      "loss": 0.6772,
+      "step": 400
+    },
+    {
+      "epoch": 1.4943052391799545,
+      "grad_norm": 0.7099676513539387,
+      "learning_rate": 5e-06,
+      "loss": 0.6803,
+      "step": 410
+    },
+    {
+      "epoch": 1.530751708428246,
+      "grad_norm": 0.6104516289365347,
+      "learning_rate": 5e-06,
+      "loss": 0.683,
+      "step": 420
+    },
+    {
+      "epoch": 1.5671981776765376,
+      "grad_norm": 0.5971848121166693,
+      "learning_rate": 5e-06,
+      "loss": 0.6787,
+      "step": 430
+    },
+    {
+      "epoch": 1.603644646924829,
+      "grad_norm": 0.6649414637192727,
+      "learning_rate": 5e-06,
+      "loss": 0.6849,
+      "step": 440
+    },
+    {
+      "epoch": 1.6400911161731209,
+      "grad_norm": 0.7320907085872882,
+      "learning_rate": 5e-06,
+      "loss": 0.6817,
+      "step": 450
+    },
+    {
+      "epoch": 1.6765375854214124,
+      "grad_norm": 0.5705453457499549,
+      "learning_rate": 5e-06,
+      "loss": 0.6836,
+      "step": 460
+    },
+    {
+      "epoch": 1.712984054669704,
+      "grad_norm": 0.6288020854363963,
+      "learning_rate": 5e-06,
+      "loss": 0.6788,
+      "step": 470
+    },
+    {
+      "epoch": 1.7494305239179955,
+      "grad_norm": 0.5726327402033801,
+      "learning_rate": 5e-06,
+      "loss": 0.6808,
+      "step": 480
+    },
+    {
+      "epoch": 1.785876993166287,
+      "grad_norm": 0.5173548522448698,
+      "learning_rate": 5e-06,
+      "loss": 0.6799,
+      "step": 490
+    },
+    {
+      "epoch": 1.8223234624145785,
+      "grad_norm": 0.5790339638087626,
+      "learning_rate": 5e-06,
+      "loss": 0.6747,
+      "step": 500
+    },
+    {
+      "epoch": 1.85876993166287,
+      "grad_norm": 0.6584239869836397,
+      "learning_rate": 5e-06,
+      "loss": 0.677,
+      "step": 510
+    },
+    {
+      "epoch": 1.8952164009111616,
+      "grad_norm": 0.5311060458966043,
+      "learning_rate": 5e-06,
+      "loss": 0.6854,
+      "step": 520
+    },
+    {
+      "epoch": 1.9316628701594531,
+      "grad_norm": 0.6512560331845895,
+      "learning_rate": 5e-06,
+      "loss": 0.6784,
+      "step": 530
+    },
+    {
+      "epoch": 1.968109339407745,
+      "grad_norm": 0.584794911589519,
+      "learning_rate": 5e-06,
+      "loss": 0.6805,
+      "step": 540
+    },
+    {
+      "epoch": 1.9972665148063782,
+      "eval_loss": 0.7187947630882263,
+      "eval_runtime": 291.938,
+      "eval_samples_per_second": 25.32,
+      "eval_steps_per_second": 0.397,
+      "step": 548
+    },
+    {
+      "epoch": 2.0045558086560367,
+      "grad_norm": 0.885264185792607,
+      "learning_rate": 5e-06,
+      "loss": 0.7193,
+      "step": 550
+    },
+    {
+      "epoch": 2.041002277904328,
+      "grad_norm": 0.7654881044126012,
+      "learning_rate": 5e-06,
+      "loss": 0.6233,
+      "step": 560
+    },
+    {
+      "epoch": 2.0774487471526197,
+      "grad_norm": 0.6151945001911823,
+      "learning_rate": 5e-06,
+      "loss": 0.6283,
+      "step": 570
+    },
+    {
+      "epoch": 2.1138952164009113,
+      "grad_norm": 0.8374365216828517,
+      "learning_rate": 5e-06,
+      "loss": 0.6207,
+      "step": 580
+    },
+    {
+      "epoch": 2.150341685649203,
+      "grad_norm": 0.7031485699411321,
+      "learning_rate": 5e-06,
+      "loss": 0.6244,
+      "step": 590
+    },
+    {
+      "epoch": 2.1867881548974943,
+      "grad_norm": 0.8633299586157721,
+      "learning_rate": 5e-06,
+      "loss": 0.6258,
+      "step": 600
+    },
+    {
+      "epoch": 2.223234624145786,
+      "grad_norm": 0.7458455143129973,
+      "learning_rate": 5e-06,
+      "loss": 0.628,
+      "step": 610
+    },
+    {
+      "epoch": 2.2596810933940774,
+      "grad_norm": 1.0109682737601884,
+      "learning_rate": 5e-06,
+      "loss": 0.6264,
+      "step": 620
+    },
+    {
+      "epoch": 2.296127562642369,
+      "grad_norm": 0.6211087319945944,
+      "learning_rate": 5e-06,
+      "loss": 0.6242,
+      "step": 630
+    },
+    {
+      "epoch": 2.3325740318906605,
+      "grad_norm": 0.7036393621519607,
+      "learning_rate": 5e-06,
+      "loss": 0.6305,
+      "step": 640
+    },
+    {
+      "epoch": 2.369020501138952,
+      "grad_norm": 0.6058323256112293,
+      "learning_rate": 5e-06,
+      "loss": 0.6297,
+      "step": 650
+    },
+    {
+      "epoch": 2.4054669703872436,
+      "grad_norm": 0.6608686938446593,
+      "learning_rate": 5e-06,
+      "loss": 0.631,
+      "step": 660
+    },
+    {
+      "epoch": 2.4419134396355355,
+      "grad_norm": 0.6199043838308076,
+      "learning_rate": 5e-06,
+      "loss": 0.6264,
+      "step": 670
+    },
+    {
+      "epoch": 2.478359908883827,
+      "grad_norm": 0.607673754805363,
+      "learning_rate": 5e-06,
+      "loss": 0.6266,
+      "step": 680
+    },
+    {
+      "epoch": 2.5148063781321186,
+      "grad_norm": 0.9277091135129097,
+      "learning_rate": 5e-06,
+      "loss": 0.6263,
+      "step": 690
+    },
+    {
+      "epoch": 2.55125284738041,
+      "grad_norm": 0.9381891238069503,
+      "learning_rate": 5e-06,
+      "loss": 0.6317,
+      "step": 700
+    },
+    {
+      "epoch": 2.5876993166287017,
+      "grad_norm": 0.6592786383334494,
+      "learning_rate": 5e-06,
+      "loss": 0.6264,
+      "step": 710
+    },
+    {
+      "epoch": 2.624145785876993,
+      "grad_norm": 0.7421181566721138,
+      "learning_rate": 5e-06,
+      "loss": 0.6295,
+      "step": 720
+    },
+    {
+      "epoch": 2.6605922551252847,
+      "grad_norm": 0.6781081672896357,
+      "learning_rate": 5e-06,
+      "loss": 0.6273,
+      "step": 730
+    },
+    {
+      "epoch": 2.6970387243735763,
+      "grad_norm": 0.609137054982541,
+      "learning_rate": 5e-06,
+      "loss": 0.6328,
+      "step": 740
+    },
+    {
+      "epoch": 2.733485193621868,
+      "grad_norm": 0.6919361244155826,
+      "learning_rate": 5e-06,
+      "loss": 0.6333,
+      "step": 750
+    },
+    {
+      "epoch": 2.7699316628701594,
+      "grad_norm": 0.6379259386020866,
+      "learning_rate": 5e-06,
+      "loss": 0.6306,
+      "step": 760
+    },
+    {
+      "epoch": 2.806378132118451,
+      "grad_norm": 0.6035608731746878,
+      "learning_rate": 5e-06,
+      "loss": 0.6338,
+      "step": 770
+    },
+    {
+      "epoch": 2.8428246013667424,
+      "grad_norm": 0.7325417971133363,
+      "learning_rate": 5e-06,
+      "loss": 0.6352,
+      "step": 780
+    },
+    {
+      "epoch": 2.879271070615034,
+      "grad_norm": 0.8532590605538493,
+      "learning_rate": 5e-06,
+      "loss": 0.6284,
+      "step": 790
+    },
+    {
+      "epoch": 2.9157175398633255,
+      "grad_norm": 0.6185281977585761,
+      "learning_rate": 5e-06,
+      "loss": 0.6306,
+      "step": 800
+    },
+    {
+      "epoch": 2.9521640091116175,
+      "grad_norm": 0.6806046770942457,
+      "learning_rate": 5e-06,
+      "loss": 0.6402,
+      "step": 810
+    },
+    {
+      "epoch": 2.988610478359909,
+      "grad_norm": 0.6802410015239903,
+      "learning_rate": 5e-06,
+      "loss": 0.6348,
+      "step": 820
+    },
+    {
+      "epoch": 2.995899772209567,
+      "eval_loss": 0.7211272120475769,
+      "eval_runtime": 291.822,
+      "eval_samples_per_second": 25.331,
+      "eval_steps_per_second": 0.398,
+      "step": 822
+    },
+    {
+      "epoch": 2.995899772209567,
+      "step": 822,
+      "total_flos": 1376671236096000.0,
+      "train_loss": 0.6964337152866261,
+      "train_runtime": 48380.1477,
+      "train_samples_per_second": 8.709,
+      "train_steps_per_second": 0.017
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 822,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1376671236096000.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed