End of training

Browse files

Files changed (7) hide show

README.md +2 -1
all_results.json +12 -0
eval_results.json +7 -0
train_results.json +8 -0
trainer_state.json +647 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama3.1
 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_airoboros_x2
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # oh_v1.3_airoboros_x2
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7277

 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_airoboros_x2
 # oh_v1.3_airoboros_x2
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/oh_v1.3_airoboros_x2 dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7277

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 2.9918991899189917,
+    "eval_loss": 0.7277409434318542,
+    "eval_runtime": 295.6731,
+    "eval_samples_per_second": 25.315,
+    "eval_steps_per_second": 0.396,
+    "total_flos": 1391746571304960.0,
+    "train_loss": 0.7029923594385278,
+    "train_runtime": 48819.4117,
+    "train_samples_per_second": 8.738,
+    "train_steps_per_second": 0.017
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 2.9918991899189917,
+    "eval_loss": 0.7277409434318542,
+    "eval_runtime": 295.6731,
+    "eval_samples_per_second": 25.315,
+    "eval_steps_per_second": 0.396
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.9918991899189917,
+    "total_flos": 1391746571304960.0,
+    "train_loss": 0.7029923594385278,
+    "train_runtime": 48819.4117,
+    "train_samples_per_second": 8.738,
+    "train_steps_per_second": 0.017
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,647 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.9918991899189917,
+  "eval_steps": 500,
+  "global_step": 831,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.036003600360036005,
+      "grad_norm": 8.28188267008514,
+      "learning_rate": 5e-06,
+      "loss": 1.0366,
+      "step": 10
+    },
+    {
+      "epoch": 0.07200720072007201,
+      "grad_norm": 2.372440318983394,
+      "learning_rate": 5e-06,
+      "loss": 0.911,
+      "step": 20
+    },
+    {
+      "epoch": 0.10801080108010801,
+      "grad_norm": 1.5430691151708913,
+      "learning_rate": 5e-06,
+      "loss": 0.8737,
+      "step": 30
+    },
+    {
+      "epoch": 0.14401440144014402,
+      "grad_norm": 1.6852866024979738,
+      "learning_rate": 5e-06,
+      "loss": 0.8448,
+      "step": 40
+    },
+    {
+      "epoch": 0.18001800180018002,
+      "grad_norm": 1.0422319477358,
+      "learning_rate": 5e-06,
+      "loss": 0.825,
+      "step": 50
+    },
+    {
+      "epoch": 0.21602160216021601,
+      "grad_norm": 0.9965056562001428,
+      "learning_rate": 5e-06,
+      "loss": 0.8107,
+      "step": 60
+    },
+    {
+      "epoch": 0.252025202520252,
+      "grad_norm": 0.7739884302751978,
+      "learning_rate": 5e-06,
+      "loss": 0.7958,
+      "step": 70
+    },
+    {
+      "epoch": 0.28802880288028804,
+      "grad_norm": 1.1803788120317176,
+      "learning_rate": 5e-06,
+      "loss": 0.7931,
+      "step": 80
+    },
+    {
+      "epoch": 0.324032403240324,
+      "grad_norm": 1.0324340981283762,
+      "learning_rate": 5e-06,
+      "loss": 0.7803,
+      "step": 90
+    },
+    {
+      "epoch": 0.36003600360036003,
+      "grad_norm": 0.9575288046763553,
+      "learning_rate": 5e-06,
+      "loss": 0.7766,
+      "step": 100
+    },
+    {
+      "epoch": 0.39603960396039606,
+      "grad_norm": 0.8374754661300629,
+      "learning_rate": 5e-06,
+      "loss": 0.7723,
+      "step": 110
+    },
+    {
+      "epoch": 0.43204320432043203,
+      "grad_norm": 0.7907545033736981,
+      "learning_rate": 5e-06,
+      "loss": 0.7647,
+      "step": 120
+    },
+    {
+      "epoch": 0.46804680468046805,
+      "grad_norm": 0.7676769317854443,
+      "learning_rate": 5e-06,
+      "loss": 0.7671,
+      "step": 130
+    },
+    {
+      "epoch": 0.504050405040504,
+      "grad_norm": 0.6051545961732036,
+      "learning_rate": 5e-06,
+      "loss": 0.7621,
+      "step": 140
+    },
+    {
+      "epoch": 0.54005400540054,
+      "grad_norm": 0.6744258472614542,
+      "learning_rate": 5e-06,
+      "loss": 0.7577,
+      "step": 150
+    },
+    {
+      "epoch": 0.5760576057605761,
+      "grad_norm": 0.7199869824208848,
+      "learning_rate": 5e-06,
+      "loss": 0.7576,
+      "step": 160
+    },
+    {
+      "epoch": 0.6120612061206121,
+      "grad_norm": 0.8152068316855611,
+      "learning_rate": 5e-06,
+      "loss": 0.7546,
+      "step": 170
+    },
+    {
+      "epoch": 0.648064806480648,
+      "grad_norm": 0.6341559903827244,
+      "learning_rate": 5e-06,
+      "loss": 0.756,
+      "step": 180
+    },
+    {
+      "epoch": 0.684068406840684,
+      "grad_norm": 0.6367964073514584,
+      "learning_rate": 5e-06,
+      "loss": 0.753,
+      "step": 190
+    },
+    {
+      "epoch": 0.7200720072007201,
+      "grad_norm": 0.647151007125146,
+      "learning_rate": 5e-06,
+      "loss": 0.7568,
+      "step": 200
+    },
+    {
+      "epoch": 0.7560756075607561,
+      "grad_norm": 0.7248014312732076,
+      "learning_rate": 5e-06,
+      "loss": 0.7512,
+      "step": 210
+    },
+    {
+      "epoch": 0.7920792079207921,
+      "grad_norm": 0.7677811996827786,
+      "learning_rate": 5e-06,
+      "loss": 0.7455,
+      "step": 220
+    },
+    {
+      "epoch": 0.828082808280828,
+      "grad_norm": 0.6716581315266134,
+      "learning_rate": 5e-06,
+      "loss": 0.7442,
+      "step": 230
+    },
+    {
+      "epoch": 0.8640864086408641,
+      "grad_norm": 0.6700748281170444,
+      "learning_rate": 5e-06,
+      "loss": 0.7463,
+      "step": 240
+    },
+    {
+      "epoch": 0.9000900090009001,
+      "grad_norm": 0.7119873555561343,
+      "learning_rate": 5e-06,
+      "loss": 0.7433,
+      "step": 250
+    },
+    {
+      "epoch": 0.9360936093609361,
+      "grad_norm": 0.7505227904025245,
+      "learning_rate": 5e-06,
+      "loss": 0.7417,
+      "step": 260
+    },
+    {
+      "epoch": 0.9720972097209721,
+      "grad_norm": 0.6278240730412481,
+      "learning_rate": 5e-06,
+      "loss": 0.7392,
+      "step": 270
+    },
+    {
+      "epoch": 0.9972997299729973,
+      "eval_loss": 0.7396969199180603,
+      "eval_runtime": 292.6851,
+      "eval_samples_per_second": 25.574,
+      "eval_steps_per_second": 0.4,
+      "step": 277
+    },
+    {
+      "epoch": 1.008100810081008,
+      "grad_norm": 1.160799237623972,
+      "learning_rate": 5e-06,
+      "loss": 0.7435,
+      "step": 280
+    },
+    {
+      "epoch": 1.0441044104410442,
+      "grad_norm": 0.8183376727189909,
+      "learning_rate": 5e-06,
+      "loss": 0.6915,
+      "step": 290
+    },
+    {
+      "epoch": 1.08010801080108,
+      "grad_norm": 0.8424246758574473,
+      "learning_rate": 5e-06,
+      "loss": 0.6946,
+      "step": 300
+    },
+    {
+      "epoch": 1.116111611161116,
+      "grad_norm": 0.7487195203197069,
+      "learning_rate": 5e-06,
+      "loss": 0.6902,
+      "step": 310
+    },
+    {
+      "epoch": 1.1521152115211521,
+      "grad_norm": 0.7322920833129869,
+      "learning_rate": 5e-06,
+      "loss": 0.6898,
+      "step": 320
+    },
+    {
+      "epoch": 1.188118811881188,
+      "grad_norm": 0.9645673518101977,
+      "learning_rate": 5e-06,
+      "loss": 0.6853,
+      "step": 330
+    },
+    {
+      "epoch": 1.2241224122412242,
+      "grad_norm": 1.4283577735373754,
+      "learning_rate": 5e-06,
+      "loss": 0.6863,
+      "step": 340
+    },
+    {
+      "epoch": 1.2601260126012601,
+      "grad_norm": 1.0313433086608226,
+      "learning_rate": 5e-06,
+      "loss": 0.689,
+      "step": 350
+    },
+    {
+      "epoch": 1.296129612961296,
+      "grad_norm": 0.6108529584385496,
+      "learning_rate": 5e-06,
+      "loss": 0.6872,
+      "step": 360
+    },
+    {
+      "epoch": 1.3321332133213322,
+      "grad_norm": 0.5789421153850809,
+      "learning_rate": 5e-06,
+      "loss": 0.6815,
+      "step": 370
+    },
+    {
+      "epoch": 1.368136813681368,
+      "grad_norm": 0.62526363928803,
+      "learning_rate": 5e-06,
+      "loss": 0.6871,
+      "step": 380
+    },
+    {
+      "epoch": 1.4041404140414042,
+      "grad_norm": 0.6010956062715849,
+      "learning_rate": 5e-06,
+      "loss": 0.6809,
+      "step": 390
+    },
+    {
+      "epoch": 1.4401440144014401,
+      "grad_norm": 0.7964635599244039,
+      "learning_rate": 5e-06,
+      "loss": 0.6885,
+      "step": 400
+    },
+    {
+      "epoch": 1.476147614761476,
+      "grad_norm": 0.5804300574827144,
+      "learning_rate": 5e-06,
+      "loss": 0.6851,
+      "step": 410
+    },
+    {
+      "epoch": 1.5121512151215122,
+      "grad_norm": 0.5856848362882421,
+      "learning_rate": 5e-06,
+      "loss": 0.6872,
+      "step": 420
+    },
+    {
+      "epoch": 1.5481548154815483,
+      "grad_norm": 0.6958357748276202,
+      "learning_rate": 5e-06,
+      "loss": 0.6872,
+      "step": 430
+    },
+    {
+      "epoch": 1.5841584158415842,
+      "grad_norm": 0.6768954740097681,
+      "learning_rate": 5e-06,
+      "loss": 0.6816,
+      "step": 440
+    },
+    {
+      "epoch": 1.6201620162016201,
+      "grad_norm": 0.6656001302473941,
+      "learning_rate": 5e-06,
+      "loss": 0.6852,
+      "step": 450
+    },
+    {
+      "epoch": 1.656165616561656,
+      "grad_norm": 0.5847297445098926,
+      "learning_rate": 5e-06,
+      "loss": 0.6837,
+      "step": 460
+    },
+    {
+      "epoch": 1.6921692169216922,
+      "grad_norm": 0.6677237827833459,
+      "learning_rate": 5e-06,
+      "loss": 0.6828,
+      "step": 470
+    },
+    {
+      "epoch": 1.7281728172817283,
+      "grad_norm": 0.6717288073606101,
+      "learning_rate": 5e-06,
+      "loss": 0.6892,
+      "step": 480
+    },
+    {
+      "epoch": 1.7641764176417642,
+      "grad_norm": 0.6026508515544339,
+      "learning_rate": 5e-06,
+      "loss": 0.6895,
+      "step": 490
+    },
+    {
+      "epoch": 1.8001800180018002,
+      "grad_norm": 0.7348397512725225,
+      "learning_rate": 5e-06,
+      "loss": 0.6888,
+      "step": 500
+    },
+    {
+      "epoch": 1.836183618361836,
+      "grad_norm": 0.6767034208815822,
+      "learning_rate": 5e-06,
+      "loss": 0.6822,
+      "step": 510
+    },
+    {
+      "epoch": 1.8721872187218722,
+      "grad_norm": 0.5868328971723439,
+      "learning_rate": 5e-06,
+      "loss": 0.6856,
+      "step": 520
+    },
+    {
+      "epoch": 1.9081908190819084,
+      "grad_norm": 0.6657122165386851,
+      "learning_rate": 5e-06,
+      "loss": 0.6858,
+      "step": 530
+    },
+    {
+      "epoch": 1.9441944194419443,
+      "grad_norm": 0.6843562239717784,
+      "learning_rate": 5e-06,
+      "loss": 0.6854,
+      "step": 540
+    },
+    {
+      "epoch": 1.9801980198019802,
+      "grad_norm": 0.6325820368205368,
+      "learning_rate": 5e-06,
+      "loss": 0.6894,
+      "step": 550
+    },
+    {
+      "epoch": 1.9981998199819984,
+      "eval_loss": 0.7263253331184387,
+      "eval_runtime": 294.6365,
+      "eval_samples_per_second": 25.404,
+      "eval_steps_per_second": 0.397,
+      "step": 555
+    },
+    {
+      "epoch": 2.016201620162016,
+      "grad_norm": 0.8901113520582664,
+      "learning_rate": 5e-06,
+      "loss": 0.6805,
+      "step": 560
+    },
+    {
+      "epoch": 2.052205220522052,
+      "grad_norm": 0.8312586311675406,
+      "learning_rate": 5e-06,
+      "loss": 0.6302,
+      "step": 570
+    },
+    {
+      "epoch": 2.0882088208820884,
+      "grad_norm": 0.645523505968572,
+      "learning_rate": 5e-06,
+      "loss": 0.6315,
+      "step": 580
+    },
+    {
+      "epoch": 2.1242124212421243,
+      "grad_norm": 0.6812801702191339,
+      "learning_rate": 5e-06,
+      "loss": 0.6301,
+      "step": 590
+    },
+    {
+      "epoch": 2.16021602160216,
+      "grad_norm": 0.6483786618034394,
+      "learning_rate": 5e-06,
+      "loss": 0.6298,
+      "step": 600
+    },
+    {
+      "epoch": 2.196219621962196,
+      "grad_norm": 0.6784530791335628,
+      "learning_rate": 5e-06,
+      "loss": 0.6354,
+      "step": 610
+    },
+    {
+      "epoch": 2.232223222322232,
+      "grad_norm": 0.60627028648818,
+      "learning_rate": 5e-06,
+      "loss": 0.6319,
+      "step": 620
+    },
+    {
+      "epoch": 2.2682268226822684,
+      "grad_norm": 0.6121470761934804,
+      "learning_rate": 5e-06,
+      "loss": 0.6336,
+      "step": 630
+    },
+    {
+      "epoch": 2.3042304230423043,
+      "grad_norm": 0.7236081633441965,
+      "learning_rate": 5e-06,
+      "loss": 0.6345,
+      "step": 640
+    },
+    {
+      "epoch": 2.34023402340234,
+      "grad_norm": 0.8548869591277164,
+      "learning_rate": 5e-06,
+      "loss": 0.6363,
+      "step": 650
+    },
+    {
+      "epoch": 2.376237623762376,
+      "grad_norm": 0.647183532105941,
+      "learning_rate": 5e-06,
+      "loss": 0.6313,
+      "step": 660
+    },
+    {
+      "epoch": 2.412241224122412,
+      "grad_norm": 0.6087053644736625,
+      "learning_rate": 5e-06,
+      "loss": 0.6357,
+      "step": 670
+    },
+    {
+      "epoch": 2.4482448244824484,
+      "grad_norm": 0.7533233767115554,
+      "learning_rate": 5e-06,
+      "loss": 0.6383,
+      "step": 680
+    },
+    {
+      "epoch": 2.4842484248424843,
+      "grad_norm": 0.8219213281281937,
+      "learning_rate": 5e-06,
+      "loss": 0.6376,
+      "step": 690
+    },
+    {
+      "epoch": 2.5202520252025202,
+      "grad_norm": 0.5918279562239513,
+      "learning_rate": 5e-06,
+      "loss": 0.6358,
+      "step": 700
+    },
+    {
+      "epoch": 2.556255625562556,
+      "grad_norm": 0.6812649937783365,
+      "learning_rate": 5e-06,
+      "loss": 0.6346,
+      "step": 710
+    },
+    {
+      "epoch": 2.592259225922592,
+      "grad_norm": 0.7062046537729157,
+      "learning_rate": 5e-06,
+      "loss": 0.6341,
+      "step": 720
+    },
+    {
+      "epoch": 2.6282628262826284,
+      "grad_norm": 0.6513878255971934,
+      "learning_rate": 5e-06,
+      "loss": 0.6345,
+      "step": 730
+    },
+    {
+      "epoch": 2.6642664266426643,
+      "grad_norm": 0.6543385256924704,
+      "learning_rate": 5e-06,
+      "loss": 0.6346,
+      "step": 740
+    },
+    {
+      "epoch": 2.7002700270027002,
+      "grad_norm": 0.737112200862458,
+      "learning_rate": 5e-06,
+      "loss": 0.6319,
+      "step": 750
+    },
+    {
+      "epoch": 2.736273627362736,
+      "grad_norm": 0.7994135143651142,
+      "learning_rate": 5e-06,
+      "loss": 0.636,
+      "step": 760
+    },
+    {
+      "epoch": 2.772277227722772,
+      "grad_norm": 0.6219351198221807,
+      "learning_rate": 5e-06,
+      "loss": 0.6426,
+      "step": 770
+    },
+    {
+      "epoch": 2.8082808280828084,
+      "grad_norm": 0.7158062619534662,
+      "learning_rate": 5e-06,
+      "loss": 0.632,
+      "step": 780
+    },
+    {
+      "epoch": 2.8442844284428443,
+      "grad_norm": 0.5601405956877966,
+      "learning_rate": 5e-06,
+      "loss": 0.6349,
+      "step": 790
+    },
+    {
+      "epoch": 2.8802880288028803,
+      "grad_norm": 0.5993882826107069,
+      "learning_rate": 5e-06,
+      "loss": 0.6382,
+      "step": 800
+    },
+    {
+      "epoch": 2.916291629162916,
+      "grad_norm": 0.5557179063729003,
+      "learning_rate": 5e-06,
+      "loss": 0.6362,
+      "step": 810
+    },
+    {
+      "epoch": 2.952295229522952,
+      "grad_norm": 0.5645061472428777,
+      "learning_rate": 5e-06,
+      "loss": 0.6357,
+      "step": 820
+    },
+    {
+      "epoch": 2.9882988298829884,
+      "grad_norm": 0.6301929405752535,
+      "learning_rate": 5e-06,
+      "loss": 0.6329,
+      "step": 830
+    },
+    {
+      "epoch": 2.9918991899189917,
+      "eval_loss": 0.7277409434318542,
+      "eval_runtime": 293.9341,
+      "eval_samples_per_second": 25.465,
+      "eval_steps_per_second": 0.398,
+      "step": 831
+    },
+    {
+      "epoch": 2.9918991899189917,
+      "step": 831,
+      "total_flos": 1391746571304960.0,
+      "train_loss": 0.7029923594385278,
+      "train_runtime": 48819.4117,
+      "train_samples_per_second": 8.738,
+      "train_steps_per_second": 0.017
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 831,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1391746571304960.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed