End of training

Browse files

Files changed (7) hide show

README.md +2 -1
all_results.json +12 -0
eval_results.json +7 -0
train_results.json +8 -0
trainer_state.json +626 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama3.1
 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_alpaca_x.25
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # oh_v1.3_alpaca_x.25
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7274

 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_alpaca_x.25
 # oh_v1.3_alpaca_x.25
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/oh_v1.3_alpaca_x.25 dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7274

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 2.99209669920967,
+    "eval_loss": 0.7273504734039307,
+    "eval_runtime": 288.6407,
+    "eval_samples_per_second": 25.1,
+    "eval_steps_per_second": 0.395,
+    "total_flos": 1346520565678080.0,
+    "train_loss": 0.7017793898558735,
+    "train_runtime": 47601.6388,
+    "train_samples_per_second": 8.675,
+    "train_steps_per_second": 0.017
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 2.99209669920967,
+    "eval_loss": 0.7273504734039307,
+    "eval_runtime": 288.6407,
+    "eval_samples_per_second": 25.1,
+    "eval_steps_per_second": 0.395
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.99209669920967,
+    "total_flos": 1346520565678080.0,
+    "train_loss": 0.7017793898558735,
+    "train_runtime": 47601.6388,
+    "train_samples_per_second": 8.675,
+    "train_steps_per_second": 0.017
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,626 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.99209669920967,
+  "eval_steps": 500,
+  "global_step": 804,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.037192003719200374,
+      "grad_norm": 10.70026537944554,
+      "learning_rate": 5e-06,
+      "loss": 1.0236,
+      "step": 10
+    },
+    {
+      "epoch": 0.07438400743840075,
+      "grad_norm": 2.4683708369251702,
+      "learning_rate": 5e-06,
+      "loss": 0.9061,
+      "step": 20
+    },
+    {
+      "epoch": 0.11157601115760112,
+      "grad_norm": 1.557001339119401,
+      "learning_rate": 5e-06,
+      "loss": 0.8674,
+      "step": 30
+    },
+    {
+      "epoch": 0.1487680148768015,
+      "grad_norm": 0.9397524898555749,
+      "learning_rate": 5e-06,
+      "loss": 0.8384,
+      "step": 40
+    },
+    {
+      "epoch": 0.18596001859600186,
+      "grad_norm": 1.1936988926222905,
+      "learning_rate": 5e-06,
+      "loss": 0.8231,
+      "step": 50
+    },
+    {
+      "epoch": 0.22315202231520223,
+      "grad_norm": 1.435799706744866,
+      "learning_rate": 5e-06,
+      "loss": 0.8033,
+      "step": 60
+    },
+    {
+      "epoch": 0.2603440260344026,
+      "grad_norm": 1.1073029578095623,
+      "learning_rate": 5e-06,
+      "loss": 0.792,
+      "step": 70
+    },
+    {
+      "epoch": 0.297536029753603,
+      "grad_norm": 0.7590034814601881,
+      "learning_rate": 5e-06,
+      "loss": 0.7831,
+      "step": 80
+    },
+    {
+      "epoch": 0.33472803347280333,
+      "grad_norm": 0.7187163632098286,
+      "learning_rate": 5e-06,
+      "loss": 0.7785,
+      "step": 90
+    },
+    {
+      "epoch": 0.3719200371920037,
+      "grad_norm": 0.8381967164711774,
+      "learning_rate": 5e-06,
+      "loss": 0.7677,
+      "step": 100
+    },
+    {
+      "epoch": 0.40911204091120407,
+      "grad_norm": 0.9379366153843277,
+      "learning_rate": 5e-06,
+      "loss": 0.769,
+      "step": 110
+    },
+    {
+      "epoch": 0.44630404463040446,
+      "grad_norm": 1.022992740054695,
+      "learning_rate": 5e-06,
+      "loss": 0.767,
+      "step": 120
+    },
+    {
+      "epoch": 0.48349604834960486,
+      "grad_norm": 0.6758807465967622,
+      "learning_rate": 5e-06,
+      "loss": 0.7648,
+      "step": 130
+    },
+    {
+      "epoch": 0.5206880520688052,
+      "grad_norm": 0.6206586625414877,
+      "learning_rate": 5e-06,
+      "loss": 0.7624,
+      "step": 140
+    },
+    {
+      "epoch": 0.5578800557880056,
+      "grad_norm": 0.7908504795288729,
+      "learning_rate": 5e-06,
+      "loss": 0.7585,
+      "step": 150
+    },
+    {
+      "epoch": 0.595072059507206,
+      "grad_norm": 0.6997124813442255,
+      "learning_rate": 5e-06,
+      "loss": 0.7561,
+      "step": 160
+    },
+    {
+      "epoch": 0.6322640632264063,
+      "grad_norm": 0.9021517727581455,
+      "learning_rate": 5e-06,
+      "loss": 0.7524,
+      "step": 170
+    },
+    {
+      "epoch": 0.6694560669456067,
+      "grad_norm": 1.0929713413971875,
+      "learning_rate": 5e-06,
+      "loss": 0.7569,
+      "step": 180
+    },
+    {
+      "epoch": 0.7066480706648071,
+      "grad_norm": 0.5949383799920382,
+      "learning_rate": 5e-06,
+      "loss": 0.748,
+      "step": 190
+    },
+    {
+      "epoch": 0.7438400743840075,
+      "grad_norm": 0.7114176678865975,
+      "learning_rate": 5e-06,
+      "loss": 0.7457,
+      "step": 200
+    },
+    {
+      "epoch": 0.7810320781032078,
+      "grad_norm": 0.6511538389514762,
+      "learning_rate": 5e-06,
+      "loss": 0.7485,
+      "step": 210
+    },
+    {
+      "epoch": 0.8182240818224081,
+      "grad_norm": 0.6169446521242461,
+      "learning_rate": 5e-06,
+      "loss": 0.7452,
+      "step": 220
+    },
+    {
+      "epoch": 0.8554160855416085,
+      "grad_norm": 0.6679649201553077,
+      "learning_rate": 5e-06,
+      "loss": 0.7413,
+      "step": 230
+    },
+    {
+      "epoch": 0.8926080892608089,
+      "grad_norm": 0.6367051553135998,
+      "learning_rate": 5e-06,
+      "loss": 0.7421,
+      "step": 240
+    },
+    {
+      "epoch": 0.9298000929800093,
+      "grad_norm": 0.6538238961476153,
+      "learning_rate": 5e-06,
+      "loss": 0.7432,
+      "step": 250
+    },
+    {
+      "epoch": 0.9669920966992097,
+      "grad_norm": 0.8807180489777804,
+      "learning_rate": 5e-06,
+      "loss": 0.7459,
+      "step": 260
+    },
+    {
+      "epoch": 0.9967456996745699,
+      "eval_loss": 0.7386682629585266,
+      "eval_runtime": 288.2212,
+      "eval_samples_per_second": 25.137,
+      "eval_steps_per_second": 0.396,
+      "step": 268
+    },
+    {
+      "epoch": 1.00511390051139,
+      "grad_norm": 0.7939367182993704,
+      "learning_rate": 5e-06,
+      "loss": 0.7611,
+      "step": 270
+    },
+    {
+      "epoch": 1.0423059042305904,
+      "grad_norm": 0.8873443926000695,
+      "learning_rate": 5e-06,
+      "loss": 0.6879,
+      "step": 280
+    },
+    {
+      "epoch": 1.0794979079497908,
+      "grad_norm": 0.8271973959498057,
+      "learning_rate": 5e-06,
+      "loss": 0.6857,
+      "step": 290
+    },
+    {
+      "epoch": 1.1166899116689912,
+      "grad_norm": 0.650424701317403,
+      "learning_rate": 5e-06,
+      "loss": 0.6851,
+      "step": 300
+    },
+    {
+      "epoch": 1.1538819153881916,
+      "grad_norm": 0.6802876097167041,
+      "learning_rate": 5e-06,
+      "loss": 0.6887,
+      "step": 310
+    },
+    {
+      "epoch": 1.1910739191073918,
+      "grad_norm": 0.6411203090099377,
+      "learning_rate": 5e-06,
+      "loss": 0.6861,
+      "step": 320
+    },
+    {
+      "epoch": 1.2282659228265922,
+      "grad_norm": 0.6005865715164875,
+      "learning_rate": 5e-06,
+      "loss": 0.6841,
+      "step": 330
+    },
+    {
+      "epoch": 1.2654579265457926,
+      "grad_norm": 0.5935758121572628,
+      "learning_rate": 5e-06,
+      "loss": 0.6888,
+      "step": 340
+    },
+    {
+      "epoch": 1.302649930264993,
+      "grad_norm": 0.692663892802115,
+      "learning_rate": 5e-06,
+      "loss": 0.6867,
+      "step": 350
+    },
+    {
+      "epoch": 1.3398419339841934,
+      "grad_norm": 0.6681511034577128,
+      "learning_rate": 5e-06,
+      "loss": 0.6867,
+      "step": 360
+    },
+    {
+      "epoch": 1.3770339377033938,
+      "grad_norm": 0.7802593590884934,
+      "learning_rate": 5e-06,
+      "loss": 0.6848,
+      "step": 370
+    },
+    {
+      "epoch": 1.4142259414225942,
+      "grad_norm": 0.9804677340757696,
+      "learning_rate": 5e-06,
+      "loss": 0.6893,
+      "step": 380
+    },
+    {
+      "epoch": 1.4514179451417946,
+      "grad_norm": 0.6847329774555626,
+      "learning_rate": 5e-06,
+      "loss": 0.6886,
+      "step": 390
+    },
+    {
+      "epoch": 1.488609948860995,
+      "grad_norm": 0.8352892154706931,
+      "learning_rate": 5e-06,
+      "loss": 0.6859,
+      "step": 400
+    },
+    {
+      "epoch": 1.5258019525801951,
+      "grad_norm": 0.5967270820719447,
+      "learning_rate": 5e-06,
+      "loss": 0.6868,
+      "step": 410
+    },
+    {
+      "epoch": 1.5629939562993957,
+      "grad_norm": 0.5880142959801617,
+      "learning_rate": 5e-06,
+      "loss": 0.6842,
+      "step": 420
+    },
+    {
+      "epoch": 1.600185960018596,
+      "grad_norm": 0.5635288435173463,
+      "learning_rate": 5e-06,
+      "loss": 0.6801,
+      "step": 430
+    },
+    {
+      "epoch": 1.6373779637377965,
+      "grad_norm": 0.6514598287968293,
+      "learning_rate": 5e-06,
+      "loss": 0.6861,
+      "step": 440
+    },
+    {
+      "epoch": 1.6745699674569967,
+      "grad_norm": 0.8211565671387909,
+      "learning_rate": 5e-06,
+      "loss": 0.6826,
+      "step": 450
+    },
+    {
+      "epoch": 1.711761971176197,
+      "grad_norm": 0.656234536726472,
+      "learning_rate": 5e-06,
+      "loss": 0.6828,
+      "step": 460
+    },
+    {
+      "epoch": 1.7489539748953975,
+      "grad_norm": 0.6424316411884232,
+      "learning_rate": 5e-06,
+      "loss": 0.686,
+      "step": 470
+    },
+    {
+      "epoch": 1.786145978614598,
+      "grad_norm": 1.055200713989444,
+      "learning_rate": 5e-06,
+      "loss": 0.6865,
+      "step": 480
+    },
+    {
+      "epoch": 1.8233379823337983,
+      "grad_norm": 0.62784173351756,
+      "learning_rate": 5e-06,
+      "loss": 0.6828,
+      "step": 490
+    },
+    {
+      "epoch": 1.8605299860529985,
+      "grad_norm": 0.6942985618719518,
+      "learning_rate": 5e-06,
+      "loss": 0.6822,
+      "step": 500
+    },
+    {
+      "epoch": 1.897721989772199,
+      "grad_norm": 0.6606054547047716,
+      "learning_rate": 5e-06,
+      "loss": 0.6903,
+      "step": 510
+    },
+    {
+      "epoch": 1.9349139934913993,
+      "grad_norm": 0.6017670933672693,
+      "learning_rate": 5e-06,
+      "loss": 0.6827,
+      "step": 520
+    },
+    {
+      "epoch": 1.9721059972105999,
+      "grad_norm": 0.6807163168349626,
+      "learning_rate": 5e-06,
+      "loss": 0.6846,
+      "step": 530
+    },
+    {
+      "epoch": 1.99814039981404,
+      "eval_loss": 0.7252578735351562,
+      "eval_runtime": 289.9595,
+      "eval_samples_per_second": 24.986,
+      "eval_steps_per_second": 0.393,
+      "step": 537
+    },
+    {
+      "epoch": 2.01022780102278,
+      "grad_norm": 1.162466060623287,
+      "learning_rate": 5e-06,
+      "loss": 0.6895,
+      "step": 540
+    },
+    {
+      "epoch": 2.0474198047419803,
+      "grad_norm": 0.8408033292352894,
+      "learning_rate": 5e-06,
+      "loss": 0.633,
+      "step": 550
+    },
+    {
+      "epoch": 2.084611808461181,
+      "grad_norm": 0.9210969340667359,
+      "learning_rate": 5e-06,
+      "loss": 0.6306,
+      "step": 560
+    },
+    {
+      "epoch": 2.121803812180381,
+      "grad_norm": 1.0327996873074714,
+      "learning_rate": 5e-06,
+      "loss": 0.6268,
+      "step": 570
+    },
+    {
+      "epoch": 2.1589958158995817,
+      "grad_norm": 0.7309858611795417,
+      "learning_rate": 5e-06,
+      "loss": 0.6236,
+      "step": 580
+    },
+    {
+      "epoch": 2.196187819618782,
+      "grad_norm": 0.797584262196217,
+      "learning_rate": 5e-06,
+      "loss": 0.6347,
+      "step": 590
+    },
+    {
+      "epoch": 2.2333798233379825,
+      "grad_norm": 0.5968988938337217,
+      "learning_rate": 5e-06,
+      "loss": 0.6303,
+      "step": 600
+    },
+    {
+      "epoch": 2.2705718270571826,
+      "grad_norm": 0.6433554089375944,
+      "learning_rate": 5e-06,
+      "loss": 0.6397,
+      "step": 610
+    },
+    {
+      "epoch": 2.3077638307763833,
+      "grad_norm": 0.6218944010086928,
+      "learning_rate": 5e-06,
+      "loss": 0.6314,
+      "step": 620
+    },
+    {
+      "epoch": 2.3449558344955834,
+      "grad_norm": 0.5763593732539121,
+      "learning_rate": 5e-06,
+      "loss": 0.6333,
+      "step": 630
+    },
+    {
+      "epoch": 2.3821478382147836,
+      "grad_norm": 0.7290595985655166,
+      "learning_rate": 5e-06,
+      "loss": 0.635,
+      "step": 640
+    },
+    {
+      "epoch": 2.419339841933984,
+      "grad_norm": 0.6299706173541643,
+      "learning_rate": 5e-06,
+      "loss": 0.6327,
+      "step": 650
+    },
+    {
+      "epoch": 2.4565318456531844,
+      "grad_norm": 0.5979725097626455,
+      "learning_rate": 5e-06,
+      "loss": 0.6325,
+      "step": 660
+    },
+    {
+      "epoch": 2.493723849372385,
+      "grad_norm": 0.7200143941899763,
+      "learning_rate": 5e-06,
+      "loss": 0.6304,
+      "step": 670
+    },
+    {
+      "epoch": 2.530915853091585,
+      "grad_norm": 0.7049012156152652,
+      "learning_rate": 5e-06,
+      "loss": 0.6291,
+      "step": 680
+    },
+    {
+      "epoch": 2.568107856810786,
+      "grad_norm": 0.6965936516378921,
+      "learning_rate": 5e-06,
+      "loss": 0.6326,
+      "step": 690
+    },
+    {
+      "epoch": 2.605299860529986,
+      "grad_norm": 0.6586486248284427,
+      "learning_rate": 5e-06,
+      "loss": 0.6359,
+      "step": 700
+    },
+    {
+      "epoch": 2.6424918642491866,
+      "grad_norm": 0.6619077887920902,
+      "learning_rate": 5e-06,
+      "loss": 0.6323,
+      "step": 710
+    },
+    {
+      "epoch": 2.6796838679683868,
+      "grad_norm": 0.6839746749523397,
+      "learning_rate": 5e-06,
+      "loss": 0.6361,
+      "step": 720
+    },
+    {
+      "epoch": 2.716875871687587,
+      "grad_norm": 0.6080087714334472,
+      "learning_rate": 5e-06,
+      "loss": 0.6354,
+      "step": 730
+    },
+    {
+      "epoch": 2.7540678754067875,
+      "grad_norm": 0.6159664816628709,
+      "learning_rate": 5e-06,
+      "loss": 0.6385,
+      "step": 740
+    },
+    {
+      "epoch": 2.791259879125988,
+      "grad_norm": 0.6838698604764274,
+      "learning_rate": 5e-06,
+      "loss": 0.6377,
+      "step": 750
+    },
+    {
+      "epoch": 2.8284518828451883,
+      "grad_norm": 0.6428004205887895,
+      "learning_rate": 5e-06,
+      "loss": 0.6379,
+      "step": 760
+    },
+    {
+      "epoch": 2.8656438865643885,
+      "grad_norm": 0.5859321354937221,
+      "learning_rate": 5e-06,
+      "loss": 0.635,
+      "step": 770
+    },
+    {
+      "epoch": 2.902835890283589,
+      "grad_norm": 0.5843229275997851,
+      "learning_rate": 5e-06,
+      "loss": 0.6373,
+      "step": 780
+    },
+    {
+      "epoch": 2.9400278940027893,
+      "grad_norm": 0.7374252361646211,
+      "learning_rate": 5e-06,
+      "loss": 0.6363,
+      "step": 790
+    },
+    {
+      "epoch": 2.97721989772199,
+      "grad_norm": 0.6415227611860442,
+      "learning_rate": 5e-06,
+      "loss": 0.6343,
+      "step": 800
+    },
+    {
+      "epoch": 2.99209669920967,
+      "eval_loss": 0.7273504734039307,
+      "eval_runtime": 288.8843,
+      "eval_samples_per_second": 25.079,
+      "eval_steps_per_second": 0.395,
+      "step": 804
+    },
+    {
+      "epoch": 2.99209669920967,
+      "step": 804,
+      "total_flos": 1346520565678080.0,
+      "train_loss": 0.7017793898558735,
+      "train_runtime": 47601.6388,
+      "train_samples_per_second": 8.675,
+      "train_steps_per_second": 0.017
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 804,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1346520565678080.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed