End of training

Browse files

Files changed (7) hide show

README.md +2 -1
all_results.json +12 -0
eval_results.json +7 -0
train_results.json +8 -0
trainer_state.json +647 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama3.1
 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_alpaca_x2
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # oh_v1.3_alpaca_x2
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7331

 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_alpaca_x2
 # oh_v1.3_alpaca_x2
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/oh_v1.3_alpaca_x2 dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7331

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 2.995973154362416,
+    "eval_loss": 0.733113169670105,
+    "eval_runtime": 297.3971,
+    "eval_samples_per_second": 25.313,
+    "eval_steps_per_second": 0.397,
+    "total_flos": 1401796794777600.0,
+    "train_loss": 0.7062144683965503,
+    "train_runtime": 49420.9289,
+    "train_samples_per_second": 8.682,
+    "train_steps_per_second": 0.017
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 2.995973154362416,
+    "eval_loss": 0.733113169670105,
+    "eval_runtime": 297.3971,
+    "eval_samples_per_second": 25.313,
+    "eval_steps_per_second": 0.397
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.995973154362416,
+    "total_flos": 1401796794777600.0,
+    "train_loss": 0.7062144683965503,
+    "train_runtime": 49420.9289,
+    "train_samples_per_second": 8.682,
+    "train_steps_per_second": 0.017
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,647 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.995973154362416,
+  "eval_steps": 500,
+  "global_step": 837,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.035794183445190156,
+      "grad_norm": 8.401638886940637,
+      "learning_rate": 5e-06,
+      "loss": 1.0542,
+      "step": 10
+    },
+    {
+      "epoch": 0.07158836689038031,
+      "grad_norm": 2.3991823729158823,
+      "learning_rate": 5e-06,
+      "loss": 0.9185,
+      "step": 20
+    },
+    {
+      "epoch": 0.10738255033557047,
+      "grad_norm": 1.8009395703582636,
+      "learning_rate": 5e-06,
+      "loss": 0.8832,
+      "step": 30
+    },
+    {
+      "epoch": 0.14317673378076062,
+      "grad_norm": 1.1993029426540307,
+      "learning_rate": 5e-06,
+      "loss": 0.8514,
+      "step": 40
+    },
+    {
+      "epoch": 0.1789709172259508,
+      "grad_norm": 1.2429515479763662,
+      "learning_rate": 5e-06,
+      "loss": 0.8348,
+      "step": 50
+    },
+    {
+      "epoch": 0.21476510067114093,
+      "grad_norm": 1.2280588442944276,
+      "learning_rate": 5e-06,
+      "loss": 0.8123,
+      "step": 60
+    },
+    {
+      "epoch": 0.2505592841163311,
+      "grad_norm": 1.0812304059733735,
+      "learning_rate": 5e-06,
+      "loss": 0.803,
+      "step": 70
+    },
+    {
+      "epoch": 0.28635346756152125,
+      "grad_norm": 1.0179970959347673,
+      "learning_rate": 5e-06,
+      "loss": 0.7897,
+      "step": 80
+    },
+    {
+      "epoch": 0.3221476510067114,
+      "grad_norm": 1.3275681971077118,
+      "learning_rate": 5e-06,
+      "loss": 0.7885,
+      "step": 90
+    },
+    {
+      "epoch": 0.3579418344519016,
+      "grad_norm": 0.9714888938151465,
+      "learning_rate": 5e-06,
+      "loss": 0.7772,
+      "step": 100
+    },
+    {
+      "epoch": 0.39373601789709173,
+      "grad_norm": 1.0884614950165932,
+      "learning_rate": 5e-06,
+      "loss": 0.7786,
+      "step": 110
+    },
+    {
+      "epoch": 0.42953020134228187,
+      "grad_norm": 1.45656642485753,
+      "learning_rate": 5e-06,
+      "loss": 0.7684,
+      "step": 120
+    },
+    {
+      "epoch": 0.465324384787472,
+      "grad_norm": 0.7010636928500263,
+      "learning_rate": 5e-06,
+      "loss": 0.7688,
+      "step": 130
+    },
+    {
+      "epoch": 0.5011185682326622,
+      "grad_norm": 0.7397488137262136,
+      "learning_rate": 5e-06,
+      "loss": 0.7706,
+      "step": 140
+    },
+    {
+      "epoch": 0.5369127516778524,
+      "grad_norm": 0.9526425688427698,
+      "learning_rate": 5e-06,
+      "loss": 0.762,
+      "step": 150
+    },
+    {
+      "epoch": 0.5727069351230425,
+      "grad_norm": 0.7088169136590493,
+      "learning_rate": 5e-06,
+      "loss": 0.7639,
+      "step": 160
+    },
+    {
+      "epoch": 0.6085011185682326,
+      "grad_norm": 0.7571173656145734,
+      "learning_rate": 5e-06,
+      "loss": 0.7568,
+      "step": 170
+    },
+    {
+      "epoch": 0.6442953020134228,
+      "grad_norm": 0.6372701282417548,
+      "learning_rate": 5e-06,
+      "loss": 0.7593,
+      "step": 180
+    },
+    {
+      "epoch": 0.680089485458613,
+      "grad_norm": 0.5399460206729987,
+      "learning_rate": 5e-06,
+      "loss": 0.752,
+      "step": 190
+    },
+    {
+      "epoch": 0.7158836689038032,
+      "grad_norm": 0.7186356757703609,
+      "learning_rate": 5e-06,
+      "loss": 0.7535,
+      "step": 200
+    },
+    {
+      "epoch": 0.7516778523489933,
+      "grad_norm": 0.6839088392647745,
+      "learning_rate": 5e-06,
+      "loss": 0.7509,
+      "step": 210
+    },
+    {
+      "epoch": 0.7874720357941835,
+      "grad_norm": 0.587117040709812,
+      "learning_rate": 5e-06,
+      "loss": 0.7572,
+      "step": 220
+    },
+    {
+      "epoch": 0.8232662192393736,
+      "grad_norm": 0.679563598992802,
+      "learning_rate": 5e-06,
+      "loss": 0.7445,
+      "step": 230
+    },
+    {
+      "epoch": 0.8590604026845637,
+      "grad_norm": 0.5683321619095465,
+      "learning_rate": 5e-06,
+      "loss": 0.7474,
+      "step": 240
+    },
+    {
+      "epoch": 0.8948545861297539,
+      "grad_norm": 0.71166875890661,
+      "learning_rate": 5e-06,
+      "loss": 0.7497,
+      "step": 250
+    },
+    {
+      "epoch": 0.930648769574944,
+      "grad_norm": 0.8045131045063768,
+      "learning_rate": 5e-06,
+      "loss": 0.7463,
+      "step": 260
+    },
+    {
+      "epoch": 0.9664429530201343,
+      "grad_norm": 0.7352070256803627,
+      "learning_rate": 5e-06,
+      "loss": 0.7406,
+      "step": 270
+    },
+    {
+      "epoch": 0.9986577181208054,
+      "eval_loss": 0.7448742389678955,
+      "eval_runtime": 293.8562,
+      "eval_samples_per_second": 25.618,
+      "eval_steps_per_second": 0.402,
+      "step": 279
+    },
+    {
+      "epoch": 1.0022371364653244,
+      "grad_norm": 1.1521591695523132,
+      "learning_rate": 5e-06,
+      "loss": 0.783,
+      "step": 280
+    },
+    {
+      "epoch": 1.0380313199105144,
+      "grad_norm": 0.9493139325679057,
+      "learning_rate": 5e-06,
+      "loss": 0.6946,
+      "step": 290
+    },
+    {
+      "epoch": 1.0738255033557047,
+      "grad_norm": 1.2157290004753143,
+      "learning_rate": 5e-06,
+      "loss": 0.6923,
+      "step": 300
+    },
+    {
+      "epoch": 1.109619686800895,
+      "grad_norm": 1.3499555504652312,
+      "learning_rate": 5e-06,
+      "loss": 0.6925,
+      "step": 310
+    },
+    {
+      "epoch": 1.145413870246085,
+      "grad_norm": 1.245428077765806,
+      "learning_rate": 5e-06,
+      "loss": 0.6938,
+      "step": 320
+    },
+    {
+      "epoch": 1.1812080536912752,
+      "grad_norm": 1.0729932571470133,
+      "learning_rate": 5e-06,
+      "loss": 0.6897,
+      "step": 330
+    },
+    {
+      "epoch": 1.2170022371364653,
+      "grad_norm": 0.6864034681556961,
+      "learning_rate": 5e-06,
+      "loss": 0.6971,
+      "step": 340
+    },
+    {
+      "epoch": 1.2527964205816555,
+      "grad_norm": 0.6794551859864176,
+      "learning_rate": 5e-06,
+      "loss": 0.6912,
+      "step": 350
+    },
+    {
+      "epoch": 1.2885906040268456,
+      "grad_norm": 0.7163040868054562,
+      "learning_rate": 5e-06,
+      "loss": 0.6892,
+      "step": 360
+    },
+    {
+      "epoch": 1.3243847874720358,
+      "grad_norm": 0.6048987435825018,
+      "learning_rate": 5e-06,
+      "loss": 0.6895,
+      "step": 370
+    },
+    {
+      "epoch": 1.360178970917226,
+      "grad_norm": 0.5792902824967668,
+      "learning_rate": 5e-06,
+      "loss": 0.6887,
+      "step": 380
+    },
+    {
+      "epoch": 1.395973154362416,
+      "grad_norm": 0.7880828049853106,
+      "learning_rate": 5e-06,
+      "loss": 0.6868,
+      "step": 390
+    },
+    {
+      "epoch": 1.4317673378076063,
+      "grad_norm": 0.7894027755224914,
+      "learning_rate": 5e-06,
+      "loss": 0.6862,
+      "step": 400
+    },
+    {
+      "epoch": 1.4675615212527964,
+      "grad_norm": 0.8577775477955841,
+      "learning_rate": 5e-06,
+      "loss": 0.6887,
+      "step": 410
+    },
+    {
+      "epoch": 1.5033557046979866,
+      "grad_norm": 0.6168297625737176,
+      "learning_rate": 5e-06,
+      "loss": 0.6866,
+      "step": 420
+    },
+    {
+      "epoch": 1.5391498881431769,
+      "grad_norm": 0.7366230838210155,
+      "learning_rate": 5e-06,
+      "loss": 0.6904,
+      "step": 430
+    },
+    {
+      "epoch": 1.574944071588367,
+      "grad_norm": 0.6622652592180838,
+      "learning_rate": 5e-06,
+      "loss": 0.6944,
+      "step": 440
+    },
+    {
+      "epoch": 1.610738255033557,
+      "grad_norm": 0.569716627527462,
+      "learning_rate": 5e-06,
+      "loss": 0.6848,
+      "step": 450
+    },
+    {
+      "epoch": 1.6465324384787472,
+      "grad_norm": 0.6235847507038318,
+      "learning_rate": 5e-06,
+      "loss": 0.6873,
+      "step": 460
+    },
+    {
+      "epoch": 1.6823266219239374,
+      "grad_norm": 0.6482653709805161,
+      "learning_rate": 5e-06,
+      "loss": 0.6834,
+      "step": 470
+    },
+    {
+      "epoch": 1.7181208053691275,
+      "grad_norm": 0.5969794316920956,
+      "learning_rate": 5e-06,
+      "loss": 0.6831,
+      "step": 480
+    },
+    {
+      "epoch": 1.7539149888143175,
+      "grad_norm": 0.6436900491750924,
+      "learning_rate": 5e-06,
+      "loss": 0.6842,
+      "step": 490
+    },
+    {
+      "epoch": 1.7897091722595078,
+      "grad_norm": 0.5633091053168701,
+      "learning_rate": 5e-06,
+      "loss": 0.687,
+      "step": 500
+    },
+    {
+      "epoch": 1.825503355704698,
+      "grad_norm": 0.6798124363156657,
+      "learning_rate": 5e-06,
+      "loss": 0.6891,
+      "step": 510
+    },
+    {
+      "epoch": 1.8612975391498883,
+      "grad_norm": 0.6792397317268221,
+      "learning_rate": 5e-06,
+      "loss": 0.6906,
+      "step": 520
+    },
+    {
+      "epoch": 1.8970917225950783,
+      "grad_norm": 0.6285135955779139,
+      "learning_rate": 5e-06,
+      "loss": 0.684,
+      "step": 530
+    },
+    {
+      "epoch": 1.9328859060402683,
+      "grad_norm": 0.5601176877036173,
+      "learning_rate": 5e-06,
+      "loss": 0.6882,
+      "step": 540
+    },
+    {
+      "epoch": 1.9686800894854586,
+      "grad_norm": 0.7243308708474905,
+      "learning_rate": 5e-06,
+      "loss": 0.6867,
+      "step": 550
+    },
+    {
+      "epoch": 1.9973154362416108,
+      "eval_loss": 0.7317517399787903,
+      "eval_runtime": 294.6356,
+      "eval_samples_per_second": 25.55,
+      "eval_steps_per_second": 0.4,
+      "step": 558
+    },
+    {
+      "epoch": 2.004474272930649,
+      "grad_norm": 0.99037695759716,
+      "learning_rate": 5e-06,
+      "loss": 0.7203,
+      "step": 560
+    },
+    {
+      "epoch": 2.040268456375839,
+      "grad_norm": 0.9902193880849315,
+      "learning_rate": 5e-06,
+      "loss": 0.6303,
+      "step": 570
+    },
+    {
+      "epoch": 2.076062639821029,
+      "grad_norm": 0.6397622212118742,
+      "learning_rate": 5e-06,
+      "loss": 0.6299,
+      "step": 580
+    },
+    {
+      "epoch": 2.111856823266219,
+      "grad_norm": 0.7917691287759923,
+      "learning_rate": 5e-06,
+      "loss": 0.6341,
+      "step": 590
+    },
+    {
+      "epoch": 2.1476510067114094,
+      "grad_norm": 0.6240987698183383,
+      "learning_rate": 5e-06,
+      "loss": 0.6354,
+      "step": 600
+    },
+    {
+      "epoch": 2.1834451901565997,
+      "grad_norm": 0.7599713166789355,
+      "learning_rate": 5e-06,
+      "loss": 0.6307,
+      "step": 610
+    },
+    {
+      "epoch": 2.21923937360179,
+      "grad_norm": 0.7131126040530179,
+      "learning_rate": 5e-06,
+      "loss": 0.6333,
+      "step": 620
+    },
+    {
+      "epoch": 2.2550335570469797,
+      "grad_norm": 0.8259088835615077,
+      "learning_rate": 5e-06,
+      "loss": 0.6358,
+      "step": 630
+    },
+    {
+      "epoch": 2.29082774049217,
+      "grad_norm": 0.6541826430581148,
+      "learning_rate": 5e-06,
+      "loss": 0.6352,
+      "step": 640
+    },
+    {
+      "epoch": 2.3266219239373602,
+      "grad_norm": 0.7050135787324793,
+      "learning_rate": 5e-06,
+      "loss": 0.6355,
+      "step": 650
+    },
+    {
+      "epoch": 2.3624161073825505,
+      "grad_norm": 0.7052621683849559,
+      "learning_rate": 5e-06,
+      "loss": 0.6337,
+      "step": 660
+    },
+    {
+      "epoch": 2.3982102908277403,
+      "grad_norm": 0.7256733119248161,
+      "learning_rate": 5e-06,
+      "loss": 0.6361,
+      "step": 670
+    },
+    {
+      "epoch": 2.4340044742729305,
+      "grad_norm": 0.7161998050929107,
+      "learning_rate": 5e-06,
+      "loss": 0.6363,
+      "step": 680
+    },
+    {
+      "epoch": 2.469798657718121,
+      "grad_norm": 0.6120627733035356,
+      "learning_rate": 5e-06,
+      "loss": 0.6361,
+      "step": 690
+    },
+    {
+      "epoch": 2.505592841163311,
+      "grad_norm": 0.6865307447351127,
+      "learning_rate": 5e-06,
+      "loss": 0.6357,
+      "step": 700
+    },
+    {
+      "epoch": 2.5413870246085013,
+      "grad_norm": 0.6728771691791204,
+      "learning_rate": 5e-06,
+      "loss": 0.6378,
+      "step": 710
+    },
+    {
+      "epoch": 2.577181208053691,
+      "grad_norm": 1.1082380037070934,
+      "learning_rate": 5e-06,
+      "loss": 0.6364,
+      "step": 720
+    },
+    {
+      "epoch": 2.6129753914988814,
+      "grad_norm": 0.6069652328826274,
+      "learning_rate": 5e-06,
+      "loss": 0.6397,
+      "step": 730
+    },
+    {
+      "epoch": 2.6487695749440716,
+      "grad_norm": 0.7205831497884968,
+      "learning_rate": 5e-06,
+      "loss": 0.6369,
+      "step": 740
+    },
+    {
+      "epoch": 2.684563758389262,
+      "grad_norm": 0.6012406001171214,
+      "learning_rate": 5e-06,
+      "loss": 0.6382,
+      "step": 750
+    },
+    {
+      "epoch": 2.720357941834452,
+      "grad_norm": 0.5859459819180776,
+      "learning_rate": 5e-06,
+      "loss": 0.6353,
+      "step": 760
+    },
+    {
+      "epoch": 2.756152125279642,
+      "grad_norm": 0.7431570789548247,
+      "learning_rate": 5e-06,
+      "loss": 0.6398,
+      "step": 770
+    },
+    {
+      "epoch": 2.791946308724832,
+      "grad_norm": 0.6535586087228809,
+      "learning_rate": 5e-06,
+      "loss": 0.6411,
+      "step": 780
+    },
+    {
+      "epoch": 2.8277404921700224,
+      "grad_norm": 0.6893473487499976,
+      "learning_rate": 5e-06,
+      "loss": 0.6367,
+      "step": 790
+    },
+    {
+      "epoch": 2.8635346756152127,
+      "grad_norm": 0.6911347153406258,
+      "learning_rate": 5e-06,
+      "loss": 0.6361,
+      "step": 800
+    },
+    {
+      "epoch": 2.899328859060403,
+      "grad_norm": 0.8336045673864524,
+      "learning_rate": 5e-06,
+      "loss": 0.6442,
+      "step": 810
+    },
+    {
+      "epoch": 2.9351230425055927,
+      "grad_norm": 0.730089605426641,
+      "learning_rate": 5e-06,
+      "loss": 0.6395,
+      "step": 820
+    },
+    {
+      "epoch": 2.970917225950783,
+      "grad_norm": 0.68005844887895,
+      "learning_rate": 5e-06,
+      "loss": 0.64,
+      "step": 830
+    },
+    {
+      "epoch": 2.995973154362416,
+      "eval_loss": 0.733113169670105,
+      "eval_runtime": 295.0877,
+      "eval_samples_per_second": 25.511,
+      "eval_steps_per_second": 0.4,
+      "step": 837
+    },
+    {
+      "epoch": 2.995973154362416,
+      "step": 837,
+      "total_flos": 1401796794777600.0,
+      "train_loss": 0.7062144683965503,
+      "train_runtime": 49420.9289,
+      "train_samples_per_second": 8.682,
+      "train_steps_per_second": 0.017
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 837,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1401796794777600.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed