End of training

Browse files

Files changed (7) hide show

README.md +2 -1
all_results.json +12 -0
eval_results.json +7 -0
train_results.json +8 -0
trainer_state.json +675 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama3.1
 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_camel_math_x2
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # oh_v1.3_camel_math_x2
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7127

 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_camel_math_x2
 # oh_v1.3_camel_math_x2
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/oh_v1.3_camel_math_x2 dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7127

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 2.9948364888123926,
+    "eval_loss": 0.7126539349555969,
+    "eval_runtime": 310.8733,
+    "eval_samples_per_second": 25.181,
+    "eval_steps_per_second": 0.396,
+    "total_flos": 1457073023877120.0,
+    "train_loss": 0.6931865083760229,
+    "train_runtime": 51465.7493,
+    "train_samples_per_second": 8.669,
+    "train_steps_per_second": 0.017
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 2.9948364888123926,
+    "eval_loss": 0.7126539349555969,
+    "eval_runtime": 310.8733,
+    "eval_samples_per_second": 25.181,
+    "eval_steps_per_second": 0.396
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.9948364888123926,
+    "total_flos": 1457073023877120.0,
+    "train_loss": 0.6931865083760229,
+    "train_runtime": 51465.7493,
+    "train_samples_per_second": 8.669,
+    "train_steps_per_second": 0.017
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,675 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.9948364888123926,
+  "eval_steps": 500,
+  "global_step": 870,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03442340791738382,
+      "grad_norm": 3.6819635678440528,
+      "learning_rate": 5e-06,
+      "loss": 1.0121,
+      "step": 10
+    },
+    {
+      "epoch": 0.06884681583476764,
+      "grad_norm": 5.925274879357907,
+      "learning_rate": 5e-06,
+      "loss": 0.8959,
+      "step": 20
+    },
+    {
+      "epoch": 0.10327022375215146,
+      "grad_norm": 1.9452350190286591,
+      "learning_rate": 5e-06,
+      "loss": 0.877,
+      "step": 30
+    },
+    {
+      "epoch": 0.13769363166953527,
+      "grad_norm": 1.180696985048175,
+      "learning_rate": 5e-06,
+      "loss": 0.8454,
+      "step": 40
+    },
+    {
+      "epoch": 0.1721170395869191,
+      "grad_norm": 0.9223333997999451,
+      "learning_rate": 5e-06,
+      "loss": 0.8155,
+      "step": 50
+    },
+    {
+      "epoch": 0.20654044750430292,
+      "grad_norm": 0.9307430421223478,
+      "learning_rate": 5e-06,
+      "loss": 0.7988,
+      "step": 60
+    },
+    {
+      "epoch": 0.24096385542168675,
+      "grad_norm": 0.791390062841487,
+      "learning_rate": 5e-06,
+      "loss": 0.7889,
+      "step": 70
+    },
+    {
+      "epoch": 0.27538726333907054,
+      "grad_norm": 0.6218724507386446,
+      "learning_rate": 5e-06,
+      "loss": 0.7806,
+      "step": 80
+    },
+    {
+      "epoch": 0.3098106712564544,
+      "grad_norm": 0.7264494941987614,
+      "learning_rate": 5e-06,
+      "loss": 0.7705,
+      "step": 90
+    },
+    {
+      "epoch": 0.3442340791738382,
+      "grad_norm": 0.6924488359878446,
+      "learning_rate": 5e-06,
+      "loss": 0.7613,
+      "step": 100
+    },
+    {
+      "epoch": 0.37865748709122204,
+      "grad_norm": 0.8803235860035055,
+      "learning_rate": 5e-06,
+      "loss": 0.761,
+      "step": 110
+    },
+    {
+      "epoch": 0.41308089500860584,
+      "grad_norm": 0.6841880149421407,
+      "learning_rate": 5e-06,
+      "loss": 0.7555,
+      "step": 120
+    },
+    {
+      "epoch": 0.4475043029259897,
+      "grad_norm": 0.667216275591224,
+      "learning_rate": 5e-06,
+      "loss": 0.7507,
+      "step": 130
+    },
+    {
+      "epoch": 0.4819277108433735,
+      "grad_norm": 0.6640712451607944,
+      "learning_rate": 5e-06,
+      "loss": 0.7474,
+      "step": 140
+    },
+    {
+      "epoch": 0.5163511187607573,
+      "grad_norm": 0.5645446586749623,
+      "learning_rate": 5e-06,
+      "loss": 0.7452,
+      "step": 150
+    },
+    {
+      "epoch": 0.5507745266781411,
+      "grad_norm": 0.7052276498344197,
+      "learning_rate": 5e-06,
+      "loss": 0.7461,
+      "step": 160
+    },
+    {
+      "epoch": 0.5851979345955249,
+      "grad_norm": 0.7026553647920556,
+      "learning_rate": 5e-06,
+      "loss": 0.7467,
+      "step": 170
+    },
+    {
+      "epoch": 0.6196213425129088,
+      "grad_norm": 0.5956245872933223,
+      "learning_rate": 5e-06,
+      "loss": 0.7483,
+      "step": 180
+    },
+    {
+      "epoch": 0.6540447504302926,
+      "grad_norm": 0.5945615019725103,
+      "learning_rate": 5e-06,
+      "loss": 0.7424,
+      "step": 190
+    },
+    {
+      "epoch": 0.6884681583476764,
+      "grad_norm": 0.5926282356688969,
+      "learning_rate": 5e-06,
+      "loss": 0.7393,
+      "step": 200
+    },
+    {
+      "epoch": 0.7228915662650602,
+      "grad_norm": 0.529261150364574,
+      "learning_rate": 5e-06,
+      "loss": 0.734,
+      "step": 210
+    },
+    {
+      "epoch": 0.7573149741824441,
+      "grad_norm": 0.70244337869977,
+      "learning_rate": 5e-06,
+      "loss": 0.7344,
+      "step": 220
+    },
+    {
+      "epoch": 0.7917383820998278,
+      "grad_norm": 0.5208460618800276,
+      "learning_rate": 5e-06,
+      "loss": 0.7332,
+      "step": 230
+    },
+    {
+      "epoch": 0.8261617900172117,
+      "grad_norm": 0.6781034767797038,
+      "learning_rate": 5e-06,
+      "loss": 0.7323,
+      "step": 240
+    },
+    {
+      "epoch": 0.8605851979345955,
+      "grad_norm": 0.6364816529741125,
+      "learning_rate": 5e-06,
+      "loss": 0.7311,
+      "step": 250
+    },
+    {
+      "epoch": 0.8950086058519794,
+      "grad_norm": 0.6857634900258707,
+      "learning_rate": 5e-06,
+      "loss": 0.73,
+      "step": 260
+    },
+    {
+      "epoch": 0.9294320137693631,
+      "grad_norm": 0.7533537266440626,
+      "learning_rate": 5e-06,
+      "loss": 0.7274,
+      "step": 270
+    },
+    {
+      "epoch": 0.963855421686747,
+      "grad_norm": 0.668483865607749,
+      "learning_rate": 5e-06,
+      "loss": 0.7281,
+      "step": 280
+    },
+    {
+      "epoch": 0.9982788296041308,
+      "grad_norm": 0.640731101658078,
+      "learning_rate": 5e-06,
+      "loss": 0.73,
+      "step": 290
+    },
+    {
+      "epoch": 0.9982788296041308,
+      "eval_loss": 0.7240723967552185,
+      "eval_runtime": 311.5856,
+      "eval_samples_per_second": 25.123,
+      "eval_steps_per_second": 0.395,
+      "step": 290
+    },
+    {
+      "epoch": 1.0327022375215147,
+      "grad_norm": 0.8232801616508892,
+      "learning_rate": 5e-06,
+      "loss": 0.7164,
+      "step": 300
+    },
+    {
+      "epoch": 1.0671256454388984,
+      "grad_norm": 0.6877231272007057,
+      "learning_rate": 5e-06,
+      "loss": 0.6796,
+      "step": 310
+    },
+    {
+      "epoch": 1.1015490533562822,
+      "grad_norm": 0.7867017260974334,
+      "learning_rate": 5e-06,
+      "loss": 0.6731,
+      "step": 320
+    },
+    {
+      "epoch": 1.1359724612736661,
+      "grad_norm": 0.6102991857765998,
+      "learning_rate": 5e-06,
+      "loss": 0.6804,
+      "step": 330
+    },
+    {
+      "epoch": 1.1703958691910499,
+      "grad_norm": 0.7250816197036796,
+      "learning_rate": 5e-06,
+      "loss": 0.676,
+      "step": 340
+    },
+    {
+      "epoch": 1.2048192771084336,
+      "grad_norm": 0.6971293258638788,
+      "learning_rate": 5e-06,
+      "loss": 0.6758,
+      "step": 350
+    },
+    {
+      "epoch": 1.2392426850258176,
+      "grad_norm": 0.6980055976515607,
+      "learning_rate": 5e-06,
+      "loss": 0.6812,
+      "step": 360
+    },
+    {
+      "epoch": 1.2736660929432013,
+      "grad_norm": 0.6257924181521026,
+      "learning_rate": 5e-06,
+      "loss": 0.6829,
+      "step": 370
+    },
+    {
+      "epoch": 1.3080895008605853,
+      "grad_norm": 0.6620444223829324,
+      "learning_rate": 5e-06,
+      "loss": 0.6787,
+      "step": 380
+    },
+    {
+      "epoch": 1.342512908777969,
+      "grad_norm": 0.6019054885784155,
+      "learning_rate": 5e-06,
+      "loss": 0.6793,
+      "step": 390
+    },
+    {
+      "epoch": 1.3769363166953528,
+      "grad_norm": 0.6430051610733118,
+      "learning_rate": 5e-06,
+      "loss": 0.6774,
+      "step": 400
+    },
+    {
+      "epoch": 1.4113597246127367,
+      "grad_norm": 0.5807368932507306,
+      "learning_rate": 5e-06,
+      "loss": 0.6812,
+      "step": 410
+    },
+    {
+      "epoch": 1.4457831325301205,
+      "grad_norm": 0.6470925978408152,
+      "learning_rate": 5e-06,
+      "loss": 0.6747,
+      "step": 420
+    },
+    {
+      "epoch": 1.4802065404475044,
+      "grad_norm": 0.6423216146537339,
+      "learning_rate": 5e-06,
+      "loss": 0.6764,
+      "step": 430
+    },
+    {
+      "epoch": 1.5146299483648882,
+      "grad_norm": 0.5134608684735672,
+      "learning_rate": 5e-06,
+      "loss": 0.6744,
+      "step": 440
+    },
+    {
+      "epoch": 1.549053356282272,
+      "grad_norm": 0.5563124728753217,
+      "learning_rate": 5e-06,
+      "loss": 0.6728,
+      "step": 450
+    },
+    {
+      "epoch": 1.5834767641996557,
+      "grad_norm": 0.6269436233978866,
+      "learning_rate": 5e-06,
+      "loss": 0.6761,
+      "step": 460
+    },
+    {
+      "epoch": 1.6179001721170396,
+      "grad_norm": 0.589734978264397,
+      "learning_rate": 5e-06,
+      "loss": 0.6792,
+      "step": 470
+    },
+    {
+      "epoch": 1.6523235800344234,
+      "grad_norm": 0.6327759222361318,
+      "learning_rate": 5e-06,
+      "loss": 0.6768,
+      "step": 480
+    },
+    {
+      "epoch": 1.6867469879518073,
+      "grad_norm": 0.6962103362892431,
+      "learning_rate": 5e-06,
+      "loss": 0.677,
+      "step": 490
+    },
+    {
+      "epoch": 1.721170395869191,
+      "grad_norm": 0.5760289071453567,
+      "learning_rate": 5e-06,
+      "loss": 0.6799,
+      "step": 500
+    },
+    {
+      "epoch": 1.7555938037865748,
+      "grad_norm": 0.6442600102377914,
+      "learning_rate": 5e-06,
+      "loss": 0.6773,
+      "step": 510
+    },
+    {
+      "epoch": 1.7900172117039586,
+      "grad_norm": 0.7715377748849698,
+      "learning_rate": 5e-06,
+      "loss": 0.6761,
+      "step": 520
+    },
+    {
+      "epoch": 1.8244406196213425,
+      "grad_norm": 0.5533000553027299,
+      "learning_rate": 5e-06,
+      "loss": 0.6736,
+      "step": 530
+    },
+    {
+      "epoch": 1.8588640275387265,
+      "grad_norm": 0.6543045883003663,
+      "learning_rate": 5e-06,
+      "loss": 0.6724,
+      "step": 540
+    },
+    {
+      "epoch": 1.8932874354561102,
+      "grad_norm": 0.7812179906299692,
+      "learning_rate": 5e-06,
+      "loss": 0.6745,
+      "step": 550
+    },
+    {
+      "epoch": 1.927710843373494,
+      "grad_norm": 0.7706494630311692,
+      "learning_rate": 5e-06,
+      "loss": 0.6744,
+      "step": 560
+    },
+    {
+      "epoch": 1.9621342512908777,
+      "grad_norm": 0.6182434646754749,
+      "learning_rate": 5e-06,
+      "loss": 0.6755,
+      "step": 570
+    },
+    {
+      "epoch": 1.9965576592082617,
+      "grad_norm": 0.6295557645635617,
+      "learning_rate": 5e-06,
+      "loss": 0.6787,
+      "step": 580
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7113586664199829,
+      "eval_runtime": 311.8588,
+      "eval_samples_per_second": 25.101,
+      "eval_steps_per_second": 0.394,
+      "step": 581
+    },
+    {
+      "epoch": 2.0309810671256456,
+      "grad_norm": 1.2061368930293643,
+      "learning_rate": 5e-06,
+      "loss": 0.6659,
+      "step": 590
+    },
+    {
+      "epoch": 2.0654044750430294,
+      "grad_norm": 1.3280429163631766,
+      "learning_rate": 5e-06,
+      "loss": 0.6232,
+      "step": 600
+    },
+    {
+      "epoch": 2.099827882960413,
+      "grad_norm": 0.8615634723401497,
+      "learning_rate": 5e-06,
+      "loss": 0.6239,
+      "step": 610
+    },
+    {
+      "epoch": 2.134251290877797,
+      "grad_norm": 0.7137137740055365,
+      "learning_rate": 5e-06,
+      "loss": 0.6196,
+      "step": 620
+    },
+    {
+      "epoch": 2.1686746987951806,
+      "grad_norm": 0.7012119673623688,
+      "learning_rate": 5e-06,
+      "loss": 0.6257,
+      "step": 630
+    },
+    {
+      "epoch": 2.2030981067125643,
+      "grad_norm": 0.7539553553577881,
+      "learning_rate": 5e-06,
+      "loss": 0.6232,
+      "step": 640
+    },
+    {
+      "epoch": 2.2375215146299485,
+      "grad_norm": 0.7635231238603634,
+      "learning_rate": 5e-06,
+      "loss": 0.6203,
+      "step": 650
+    },
+    {
+      "epoch": 2.2719449225473323,
+      "grad_norm": 0.6908410296367468,
+      "learning_rate": 5e-06,
+      "loss": 0.6254,
+      "step": 660
+    },
+    {
+      "epoch": 2.306368330464716,
+      "grad_norm": 0.6587745940287006,
+      "learning_rate": 5e-06,
+      "loss": 0.6301,
+      "step": 670
+    },
+    {
+      "epoch": 2.3407917383820998,
+      "grad_norm": 0.5798868468674587,
+      "learning_rate": 5e-06,
+      "loss": 0.6279,
+      "step": 680
+    },
+    {
+      "epoch": 2.3752151462994835,
+      "grad_norm": 0.8440728118550425,
+      "learning_rate": 5e-06,
+      "loss": 0.6287,
+      "step": 690
+    },
+    {
+      "epoch": 2.4096385542168672,
+      "grad_norm": 0.6066489275997706,
+      "learning_rate": 5e-06,
+      "loss": 0.6295,
+      "step": 700
+    },
+    {
+      "epoch": 2.4440619621342514,
+      "grad_norm": 0.7165812340817078,
+      "learning_rate": 5e-06,
+      "loss": 0.6277,
+      "step": 710
+    },
+    {
+      "epoch": 2.478485370051635,
+      "grad_norm": 0.6122168594678861,
+      "learning_rate": 5e-06,
+      "loss": 0.6293,
+      "step": 720
+    },
+    {
+      "epoch": 2.512908777969019,
+      "grad_norm": 0.81573767147419,
+      "learning_rate": 5e-06,
+      "loss": 0.6287,
+      "step": 730
+    },
+    {
+      "epoch": 2.5473321858864026,
+      "grad_norm": 0.5606648215554753,
+      "learning_rate": 5e-06,
+      "loss": 0.6294,
+      "step": 740
+    },
+    {
+      "epoch": 2.581755593803787,
+      "grad_norm": 0.6406975384981994,
+      "learning_rate": 5e-06,
+      "loss": 0.6232,
+      "step": 750
+    },
+    {
+      "epoch": 2.6161790017211706,
+      "grad_norm": 0.6856546267607884,
+      "learning_rate": 5e-06,
+      "loss": 0.6291,
+      "step": 760
+    },
+    {
+      "epoch": 2.6506024096385543,
+      "grad_norm": 0.6347450877099359,
+      "learning_rate": 5e-06,
+      "loss": 0.6283,
+      "step": 770
+    },
+    {
+      "epoch": 2.685025817555938,
+      "grad_norm": 0.6621950677045059,
+      "learning_rate": 5e-06,
+      "loss": 0.6305,
+      "step": 780
+    },
+    {
+      "epoch": 2.719449225473322,
+      "grad_norm": 0.6174426541448764,
+      "learning_rate": 5e-06,
+      "loss": 0.6255,
+      "step": 790
+    },
+    {
+      "epoch": 2.7538726333907055,
+      "grad_norm": 0.6772601640104119,
+      "learning_rate": 5e-06,
+      "loss": 0.6314,
+      "step": 800
+    },
+    {
+      "epoch": 2.7882960413080893,
+      "grad_norm": 0.5940690265376317,
+      "learning_rate": 5e-06,
+      "loss": 0.6261,
+      "step": 810
+    },
+    {
+      "epoch": 2.8227194492254735,
+      "grad_norm": 0.5557807625472435,
+      "learning_rate": 5e-06,
+      "loss": 0.6266,
+      "step": 820
+    },
+    {
+      "epoch": 2.857142857142857,
+      "grad_norm": 0.7023723168127282,
+      "learning_rate": 5e-06,
+      "loss": 0.6278,
+      "step": 830
+    },
+    {
+      "epoch": 2.891566265060241,
+      "grad_norm": 0.5869122563169644,
+      "learning_rate": 5e-06,
+      "loss": 0.6272,
+      "step": 840
+    },
+    {
+      "epoch": 2.9259896729776247,
+      "grad_norm": 0.6112033798118853,
+      "learning_rate": 5e-06,
+      "loss": 0.6304,
+      "step": 850
+    },
+    {
+      "epoch": 2.960413080895009,
+      "grad_norm": 0.6445615202118182,
+      "learning_rate": 5e-06,
+      "loss": 0.631,
+      "step": 860
+    },
+    {
+      "epoch": 2.9948364888123926,
+      "grad_norm": 0.6223406143063472,
+      "learning_rate": 5e-06,
+      "loss": 0.6305,
+      "step": 870
+    },
+    {
+      "epoch": 2.9948364888123926,
+      "eval_loss": 0.7126539349555969,
+      "eval_runtime": 315.6621,
+      "eval_samples_per_second": 24.799,
+      "eval_steps_per_second": 0.39,
+      "step": 870
+    },
+    {
+      "epoch": 2.9948364888123926,
+      "step": 870,
+      "total_flos": 1457073023877120.0,
+      "train_loss": 0.6931865083760229,
+      "train_runtime": 51465.7493,
+      "train_samples_per_second": 8.669,
+      "train_steps_per_second": 0.017
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 870,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1457073023877120.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed