End of training

Browse files

Files changed (5) hide show

README.md +2 -1
all_results.json +8 -0
train_results.json +8 -0
trainer_state.json +861 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: mit
 base_model: microsoft/phi-4
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: phi-4-openalex
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # phi-4-openalex
-This model is a fine-tuned version of [microsoft/phi-4](https://huggingface.co/microsoft/phi-4) on an unknown dataset.
 ## Model description

 base_model: microsoft/phi-4
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: phi-4-openalex
 # phi-4-openalex
+This model is a fine-tuned version of [microsoft/phi-4](https://huggingface.co/microsoft/phi-4) on the openalex dataset.
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "total_flos": 609898777804800.0,
+    "train_loss": 1.8114001343392918,
+    "train_runtime": 22537.3565,
+    "train_samples_per_second": 1.328,
+    "train_steps_per_second": 0.005
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "total_flos": 609898777804800.0,
+    "train_loss": 1.8114001343392918,
+    "train_runtime": 22537.3565,
+    "train_samples_per_second": 1.328,
+    "train_steps_per_second": 0.005
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,861 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 117,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.008547008547008548,
+      "grad_norm": 0.13582941889762878,
+      "learning_rate": 1.6666666666666667e-06,
+      "loss": 1.8884,
+      "step": 1
+    },
+    {
+      "epoch": 0.017094017094017096,
+      "grad_norm": 0.13621631264686584,
+      "learning_rate": 3.3333333333333333e-06,
+      "loss": 1.8929,
+      "step": 2
+    },
+    {
+      "epoch": 0.02564102564102564,
+      "grad_norm": 0.11699245870113373,
+      "learning_rate": 5e-06,
+      "loss": 1.9025,
+      "step": 3
+    },
+    {
+      "epoch": 0.03418803418803419,
+      "grad_norm": 0.05705893784761429,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 1.8795,
+      "step": 4
+    },
+    {
+      "epoch": 0.042735042735042736,
+      "grad_norm": 0.056257281452417374,
+      "learning_rate": 8.333333333333334e-06,
+      "loss": 1.8705,
+      "step": 5
+    },
+    {
+      "epoch": 0.05128205128205128,
+      "grad_norm": 0.04789767786860466,
+      "learning_rate": 1e-05,
+      "loss": 1.8572,
+      "step": 6
+    },
+    {
+      "epoch": 0.05982905982905983,
+      "grad_norm": 0.08197996765375137,
+      "learning_rate": 9.99799753559161e-06,
+      "loss": 1.8696,
+      "step": 7
+    },
+    {
+      "epoch": 0.06837606837606838,
+      "grad_norm": 0.05935126915574074,
+      "learning_rate": 9.991991746311916e-06,
+      "loss": 1.8535,
+      "step": 8
+    },
+    {
+      "epoch": 0.07692307692307693,
+      "grad_norm": 0.08124881237745285,
+      "learning_rate": 9.981987442712634e-06,
+      "loss": 1.8623,
+      "step": 9
+    },
+    {
+      "epoch": 0.08547008547008547,
+      "grad_norm": 0.038786403834819794,
+      "learning_rate": 9.967992638098517e-06,
+      "loss": 1.8575,
+      "step": 10
+    },
+    {
+      "epoch": 0.09401709401709402,
+      "grad_norm": 0.061246760189533234,
+      "learning_rate": 9.950018542108818e-06,
+      "loss": 1.8533,
+      "step": 11
+    },
+    {
+      "epoch": 0.10256410256410256,
+      "grad_norm": 0.052762433886528015,
+      "learning_rate": 9.928079551738542e-06,
+      "loss": 1.8447,
+      "step": 12
+    },
+    {
+      "epoch": 0.1111111111111111,
+      "grad_norm": 0.045264095067977905,
+      "learning_rate": 9.902193239806634e-06,
+      "loss": 1.8458,
+      "step": 13
+    },
+    {
+      "epoch": 0.11965811965811966,
+      "grad_norm": 0.049116991460323334,
+      "learning_rate": 9.872380340880416e-06,
+      "loss": 1.8447,
+      "step": 14
+    },
+    {
+      "epoch": 0.1282051282051282,
+      "grad_norm": 0.04788421094417572,
+      "learning_rate": 9.838664734667496e-06,
+      "loss": 1.8474,
+      "step": 15
+    },
+    {
+      "epoch": 0.13675213675213677,
+      "grad_norm": 0.03990117087960243,
+      "learning_rate": 9.801073426888447e-06,
+      "loss": 1.8424,
+      "step": 16
+    },
+    {
+      "epoch": 0.1452991452991453,
+      "grad_norm": 0.0356304794549942,
+      "learning_rate": 9.759636527645633e-06,
+      "loss": 1.8274,
+      "step": 17
+    },
+    {
+      "epoch": 0.15384615384615385,
+      "grad_norm": 0.04112239554524422,
+      "learning_rate": 9.714387227305422e-06,
+      "loss": 1.8262,
+      "step": 18
+    },
+    {
+      "epoch": 0.1623931623931624,
+      "grad_norm": 0.04289136081933975,
+      "learning_rate": 9.665361769913187e-06,
+      "loss": 1.8331,
+      "step": 19
+    },
+    {
+      "epoch": 0.17094017094017094,
+      "grad_norm": 0.03238912671804428,
+      "learning_rate": 9.612599424162344e-06,
+      "loss": 1.8278,
+      "step": 20
+    },
+    {
+      "epoch": 0.1794871794871795,
+      "grad_norm": 0.03170251473784447,
+      "learning_rate": 9.55614245194068e-06,
+      "loss": 1.8238,
+      "step": 21
+    },
+    {
+      "epoch": 0.18803418803418803,
+      "grad_norm": 0.03161657974123955,
+      "learning_rate": 9.496036074479184e-06,
+      "loss": 1.8282,
+      "step": 22
+    },
+    {
+      "epoch": 0.19658119658119658,
+      "grad_norm": 0.028967849910259247,
+      "learning_rate": 9.432328436130493e-06,
+      "loss": 1.825,
+      "step": 23
+    },
+    {
+      "epoch": 0.20512820512820512,
+      "grad_norm": 0.025380073115229607,
+      "learning_rate": 9.365070565805941e-06,
+      "loss": 1.8165,
+      "step": 24
+    },
+    {
+      "epoch": 0.21367521367521367,
+      "grad_norm": 0.027646927163004875,
+      "learning_rate": 9.294316336102132e-06,
+      "loss": 1.8194,
+      "step": 25
+    },
+    {
+      "epoch": 0.2222222222222222,
+      "grad_norm": 0.02571098506450653,
+      "learning_rate": 9.220122420149753e-06,
+      "loss": 1.822,
+      "step": 26
+    },
+    {
+      "epoch": 0.23076923076923078,
+      "grad_norm": 0.02322917804121971,
+      "learning_rate": 9.142548246219212e-06,
+      "loss": 1.8218,
+      "step": 27
+    },
+    {
+      "epoch": 0.23931623931623933,
+      "grad_norm": 0.022094452753663063,
+      "learning_rate": 9.06165595011943e-06,
+      "loss": 1.8245,
+      "step": 28
+    },
+    {
+      "epoch": 0.24786324786324787,
+      "grad_norm": 0.025401204824447632,
+      "learning_rate": 8.97751032542795e-06,
+      "loss": 1.8087,
+      "step": 29
+    },
+    {
+      "epoch": 0.2564102564102564,
+      "grad_norm": 0.02527693286538124,
+      "learning_rate": 8.890178771592198e-06,
+      "loss": 1.8097,
+      "step": 30
+    },
+    {
+      "epoch": 0.26495726495726496,
+      "grad_norm": 0.02179970219731331,
+      "learning_rate": 8.799731239943488e-06,
+      "loss": 1.8207,
+      "step": 31
+    },
+    {
+      "epoch": 0.27350427350427353,
+      "grad_norm": 0.01948496885597706,
+      "learning_rate": 8.706240177667003e-06,
+      "loss": 1.8124,
+      "step": 32
+    },
+    {
+      "epoch": 0.28205128205128205,
+      "grad_norm": 0.01962677575647831,
+      "learning_rate": 8.609780469772623e-06,
+      "loss": 1.8184,
+      "step": 33
+    },
+    {
+      "epoch": 0.2905982905982906,
+      "grad_norm": 0.019485613331198692,
+      "learning_rate": 8.510429379113114e-06,
+      "loss": 1.819,
+      "step": 34
+    },
+    {
+      "epoch": 0.29914529914529914,
+      "grad_norm": 0.01905289851129055,
+      "learning_rate": 8.408266484497664e-06,
+      "loss": 1.8153,
+      "step": 35
+    },
+    {
+      "epoch": 0.3076923076923077,
+      "grad_norm": 0.019722452387213707,
+      "learning_rate": 8.303373616950408e-06,
+      "loss": 1.8141,
+      "step": 36
+    },
+    {
+      "epoch": 0.3162393162393162,
+      "grad_norm": 0.02127656526863575,
+      "learning_rate": 8.195834794164925e-06,
+      "loss": 1.8072,
+      "step": 37
+    },
+    {
+      "epoch": 0.3247863247863248,
+      "grad_norm": 0.020507801324129105,
+      "learning_rate": 8.085736153207277e-06,
+      "loss": 1.8171,
+      "step": 38
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 0.016468843445181847,
+      "learning_rate": 7.973165881521435e-06,
+      "loss": 1.8192,
+      "step": 39
+    },
+    {
+      "epoch": 0.3418803418803419,
+      "grad_norm": 0.01777919940650463,
+      "learning_rate": 7.858214146292394e-06,
+      "loss": 1.8069,
+      "step": 40
+    },
+    {
+      "epoch": 0.3504273504273504,
+      "grad_norm": 0.01822058856487274,
+      "learning_rate": 7.74097302222355e-06,
+      "loss": 1.8093,
+      "step": 41
+    },
+    {
+      "epoch": 0.358974358974359,
+      "grad_norm": 0.0196178387850523,
+      "learning_rate": 7.621536417786159e-06,
+      "loss": 1.8014,
+      "step": 42
+    },
+    {
+      "epoch": 0.36752136752136755,
+      "grad_norm": 0.017990173771977425,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 1.8108,
+      "step": 43
+    },
+    {
+      "epoch": 0.37606837606837606,
+      "grad_norm": 0.01827268861234188,
+      "learning_rate": 7.37646111780545e-06,
+      "loss": 1.8069,
+      "step": 44
+    },
+    {
+      "epoch": 0.38461538461538464,
+      "grad_norm": 0.016010349616408348,
+      "learning_rate": 7.251018724088367e-06,
+      "loss": 1.8057,
+      "step": 45
+    },
+    {
+      "epoch": 0.39316239316239315,
+      "grad_norm": 0.01656198874115944,
+      "learning_rate": 7.12377329642024e-06,
+      "loss": 1.8065,
+      "step": 46
+    },
+    {
+      "epoch": 0.4017094017094017,
+      "grad_norm": 0.01725705899298191,
+      "learning_rate": 6.994826756577082e-06,
+      "loss": 1.7997,
+      "step": 47
+    },
+    {
+      "epoch": 0.41025641025641024,
+      "grad_norm": 0.01705297827720642,
+      "learning_rate": 6.864282388901544e-06,
+      "loss": 1.8073,
+      "step": 48
+    },
+    {
+      "epoch": 0.4188034188034188,
+      "grad_norm": 0.015689667314291,
+      "learning_rate": 6.732244757573619e-06,
+      "loss": 1.8056,
+      "step": 49
+    },
+    {
+      "epoch": 0.42735042735042733,
+      "grad_norm": 0.01496344804763794,
+      "learning_rate": 6.598819622856227e-06,
+      "loss": 1.807,
+      "step": 50
+    },
+    {
+      "epoch": 0.4358974358974359,
+      "grad_norm": 0.015072242356836796,
+      "learning_rate": 6.464113856382752e-06,
+      "loss": 1.8098,
+      "step": 51
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "grad_norm": 0.015582077205181122,
+      "learning_rate": 6.328235355554382e-06,
+      "loss": 1.8016,
+      "step": 52
+    },
+    {
+      "epoch": 0.452991452991453,
+      "grad_norm": 0.015510096214711666,
+      "learning_rate": 6.191292957115825e-06,
+      "loss": 1.8111,
+      "step": 53
+    },
+    {
+      "epoch": 0.46153846153846156,
+      "grad_norm": 0.01476968452334404,
+      "learning_rate": 6.053396349978632e-06,
+      "loss": 1.8038,
+      "step": 54
+    },
+    {
+      "epoch": 0.4700854700854701,
+      "grad_norm": 0.014734070748090744,
+      "learning_rate": 5.914655987361934e-06,
+      "loss": 1.7871,
+      "step": 55
+    },
+    {
+      "epoch": 0.47863247863247865,
+      "grad_norm": 0.015119190327823162,
+      "learning_rate": 5.77518299832099e-06,
+      "loss": 1.8018,
+      "step": 56
+    },
+    {
+      "epoch": 0.48717948717948717,
+      "grad_norm": 0.01506562065333128,
+      "learning_rate": 5.635089098734394e-06,
+      "loss": 1.8044,
+      "step": 57
+    },
+    {
+      "epoch": 0.49572649572649574,
+      "grad_norm": 0.014175213873386383,
+      "learning_rate": 5.49448650182125e-06,
+      "loss": 1.8005,
+      "step": 58
+    },
+    {
+      "epoch": 0.5042735042735043,
+      "grad_norm": 0.014436254277825356,
+      "learning_rate": 5.353487828259973e-06,
+      "loss": 1.8026,
+      "step": 59
+    },
+    {
+      "epoch": 0.5128205128205128,
+      "grad_norm": 0.014683015644550323,
+      "learning_rate": 5.212206015980742e-06,
+      "loss": 1.7954,
+      "step": 60
+    },
+    {
+      "epoch": 0.5213675213675214,
+      "grad_norm": 0.014612887986004353,
+      "learning_rate": 5.070754229703811e-06,
+      "loss": 1.7961,
+      "step": 61
+    },
+    {
+      "epoch": 0.5299145299145299,
+      "grad_norm": 0.014702665619552135,
+      "learning_rate": 4.929245770296191e-06,
+      "loss": 1.8036,
+      "step": 62
+    },
+    {
+      "epoch": 0.5384615384615384,
+      "grad_norm": 0.014264926314353943,
+      "learning_rate": 4.78779398401926e-06,
+      "loss": 1.8029,
+      "step": 63
+    },
+    {
+      "epoch": 0.5470085470085471,
+      "grad_norm": 0.014685841277241707,
+      "learning_rate": 4.646512171740028e-06,
+      "loss": 1.8026,
+      "step": 64
+    },
+    {
+      "epoch": 0.5555555555555556,
+      "grad_norm": 0.01473164837807417,
+      "learning_rate": 4.505513498178752e-06,
+      "loss": 1.8079,
+      "step": 65
+    },
+    {
+      "epoch": 0.5641025641025641,
+      "grad_norm": 0.014097092673182487,
+      "learning_rate": 4.364910901265607e-06,
+      "loss": 1.8008,
+      "step": 66
+    },
+    {
+      "epoch": 0.5726495726495726,
+      "grad_norm": 0.014091964811086655,
+      "learning_rate": 4.224817001679011e-06,
+      "loss": 1.7983,
+      "step": 67
+    },
+    {
+      "epoch": 0.5811965811965812,
+      "grad_norm": 0.01420042384415865,
+      "learning_rate": 4.085344012638067e-06,
+      "loss": 1.799,
+      "step": 68
+    },
+    {
+      "epoch": 0.5897435897435898,
+      "grad_norm": 0.01425846852362156,
+      "learning_rate": 3.94660365002137e-06,
+      "loss": 1.7981,
+      "step": 69
+    },
+    {
+      "epoch": 0.5982905982905983,
+      "grad_norm": 0.1001775860786438,
+      "learning_rate": 3.808707042884176e-06,
+      "loss": 1.7983,
+      "step": 70
+    },
+    {
+      "epoch": 0.6068376068376068,
+      "grad_norm": 0.014455100521445274,
+      "learning_rate": 3.6717646444456196e-06,
+      "loss": 1.7924,
+      "step": 71
+    },
+    {
+      "epoch": 0.6153846153846154,
+      "grad_norm": 0.014086514711380005,
+      "learning_rate": 3.5358861436172487e-06,
+      "loss": 1.799,
+      "step": 72
+    },
+    {
+      "epoch": 0.6239316239316239,
+      "grad_norm": 0.015152843669056892,
+      "learning_rate": 3.401180377143774e-06,
+      "loss": 1.8005,
+      "step": 73
+    },
+    {
+      "epoch": 0.6324786324786325,
+      "grad_norm": 0.014293001964688301,
+      "learning_rate": 3.2677552424263836e-06,
+      "loss": 1.7956,
+      "step": 74
+    },
+    {
+      "epoch": 0.6410256410256411,
+      "grad_norm": 0.014682993292808533,
+      "learning_rate": 3.1357176110984578e-06,
+      "loss": 1.798,
+      "step": 75
+    },
+    {
+      "epoch": 0.6495726495726496,
+      "grad_norm": 0.014120382256805897,
+      "learning_rate": 3.0051732434229185e-06,
+      "loss": 1.7934,
+      "step": 76
+    },
+    {
+      "epoch": 0.6581196581196581,
+      "grad_norm": 0.014230918139219284,
+      "learning_rate": 2.8762267035797607e-06,
+      "loss": 1.8012,
+      "step": 77
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.01403116900473833,
+      "learning_rate": 2.748981275911633e-06,
+      "loss": 1.7915,
+      "step": 78
+    },
+    {
+      "epoch": 0.6752136752136753,
+      "grad_norm": 0.013814397156238556,
+      "learning_rate": 2.6235388821945497e-06,
+      "loss": 1.7867,
+      "step": 79
+    },
+    {
+      "epoch": 0.6837606837606838,
+      "grad_norm": 0.013797773979604244,
+      "learning_rate": 2.5000000000000015e-06,
+      "loss": 1.7952,
+      "step": 80
+    },
+    {
+      "epoch": 0.6923076923076923,
+      "grad_norm": 0.013870066963136196,
+      "learning_rate": 2.3784635822138424e-06,
+      "loss": 1.8026,
+      "step": 81
+    },
+    {
+      "epoch": 0.7008547008547008,
+      "grad_norm": 0.014055611565709114,
+      "learning_rate": 2.2590269777764516e-06,
+      "loss": 1.7956,
+      "step": 82
+    },
+    {
+      "epoch": 0.7094017094017094,
+      "grad_norm": 0.013872554525732994,
+      "learning_rate": 2.141785853707607e-06,
+      "loss": 1.7947,
+      "step": 83
+    },
+    {
+      "epoch": 0.717948717948718,
+      "grad_norm": 0.013987288810312748,
+      "learning_rate": 2.0268341184785674e-06,
+      "loss": 1.7963,
+      "step": 84
+    },
+    {
+      "epoch": 0.7264957264957265,
+      "grad_norm": 0.0137988505885005,
+      "learning_rate": 1.9142638467927254e-06,
+      "loss": 1.7974,
+      "step": 85
+    },
+    {
+      "epoch": 0.7350427350427351,
+      "grad_norm": 0.013994401320815086,
+      "learning_rate": 1.8041652058350768e-06,
+      "loss": 1.794,
+      "step": 86
+    },
+    {
+      "epoch": 0.7435897435897436,
+      "grad_norm": 0.013571497052907944,
+      "learning_rate": 1.6966263830495939e-06,
+      "loss": 1.8017,
+      "step": 87
+    },
+    {
+      "epoch": 0.7521367521367521,
+      "grad_norm": 0.018896423280239105,
+      "learning_rate": 1.5917335155023368e-06,
+      "loss": 1.7874,
+      "step": 88
+    },
+    {
+      "epoch": 0.7606837606837606,
+      "grad_norm": 0.013724449090659618,
+      "learning_rate": 1.4895706208868876e-06,
+      "loss": 1.8012,
+      "step": 89
+    },
+    {
+      "epoch": 0.7692307692307693,
+      "grad_norm": 0.014079892076551914,
+      "learning_rate": 1.390219530227378e-06,
+      "loss": 1.8029,
+      "step": 90
+    },
+    {
+      "epoch": 0.7777777777777778,
+      "grad_norm": 0.01355001050978899,
+      "learning_rate": 1.2937598223330006e-06,
+      "loss": 1.7955,
+      "step": 91
+    },
+    {
+      "epoch": 0.7863247863247863,
+      "grad_norm": 0.013746261596679688,
+      "learning_rate": 1.2002687600565138e-06,
+      "loss": 1.7919,
+      "step": 92
+    },
+    {
+      "epoch": 0.7948717948717948,
+      "grad_norm": 0.013557142578065395,
+      "learning_rate": 1.1098212284078037e-06,
+      "loss": 1.794,
+      "step": 93
+    },
+    {
+      "epoch": 0.8034188034188035,
+      "grad_norm": 0.014017937704920769,
+      "learning_rate": 1.0224896745720513e-06,
+      "loss": 1.7978,
+      "step": 94
+    },
+    {
+      "epoch": 0.811965811965812,
+      "grad_norm": 0.013620711863040924,
+      "learning_rate": 9.383440498805712e-07,
+      "loss": 1.8013,
+      "step": 95
+    },
+    {
+      "epoch": 0.8205128205128205,
+      "grad_norm": 0.013980009593069553,
+      "learning_rate": 8.574517537807897e-07,
+      "loss": 1.7928,
+      "step": 96
+    },
+    {
+      "epoch": 0.8290598290598291,
+      "grad_norm": 0.013654535636305809,
+      "learning_rate": 7.798775798502484e-07,
+      "loss": 1.7875,
+      "step": 97
+    },
+    {
+      "epoch": 0.8376068376068376,
+      "grad_norm": 0.013558434322476387,
+      "learning_rate": 7.056836638978698e-07,
+      "loss": 1.7868,
+      "step": 98
+    },
+    {
+      "epoch": 0.8461538461538461,
+      "grad_norm": 0.01349763572216034,
+      "learning_rate": 6.349294341940593e-07,
+      "loss": 1.7947,
+      "step": 99
+    },
+    {
+      "epoch": 0.8547008547008547,
+      "grad_norm": 0.013618433848023415,
+      "learning_rate": 5.676715638695063e-07,
+      "loss": 1.7933,
+      "step": 100
+    },
+    {
+      "epoch": 0.8632478632478633,
+      "grad_norm": 0.013701952062547207,
+      "learning_rate": 5.039639255208156e-07,
+      "loss": 1.7983,
+      "step": 101
+    },
+    {
+      "epoch": 0.8717948717948718,
+      "grad_norm": 0.013862798921763897,
+      "learning_rate": 4.43857548059321e-07,
+      "loss": 1.7923,
+      "step": 102
+    },
+    {
+      "epoch": 0.8803418803418803,
+      "grad_norm": 0.013503090478479862,
+      "learning_rate": 3.87400575837657e-07,
+      "loss": 1.8015,
+      "step": 103
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 0.01364339329302311,
+      "learning_rate": 3.346382300868134e-07,
+      "loss": 1.7987,
+      "step": 104
+    },
+    {
+      "epoch": 0.8974358974358975,
+      "grad_norm": 0.013393942266702652,
+      "learning_rate": 2.85612772694579e-07,
+      "loss": 1.801,
+      "step": 105
+    },
+    {
+      "epoch": 0.905982905982906,
+      "grad_norm": 0.01394713670015335,
+      "learning_rate": 2.403634723543674e-07,
+      "loss": 1.797,
+      "step": 106
+    },
+    {
+      "epoch": 0.9145299145299145,
+      "grad_norm": 0.013658151961863041,
+      "learning_rate": 1.989265731115525e-07,
+      "loss": 1.7963,
+      "step": 107
+    },
+    {
+      "epoch": 0.9230769230769231,
+      "grad_norm": 0.013742955401539803,
+      "learning_rate": 1.6133526533250566e-07,
+      "loss": 1.7919,
+      "step": 108
+    },
+    {
+      "epoch": 0.9316239316239316,
+      "grad_norm": 0.013528905808925629,
+      "learning_rate": 1.2761965911958385e-07,
+      "loss": 1.7973,
+      "step": 109
+    },
+    {
+      "epoch": 0.9401709401709402,
+      "grad_norm": 0.01358366571366787,
+      "learning_rate": 9.780676019336632e-08,
+      "loss": 1.7944,
+      "step": 110
+    },
+    {
+      "epoch": 0.9487179487179487,
+      "grad_norm": 0.017376506701111794,
+      "learning_rate": 7.192044826145772e-08,
+      "loss": 1.7929,
+      "step": 111
+    },
+    {
+      "epoch": 0.9572649572649573,
+      "grad_norm": 0.013746129348874092,
+      "learning_rate": 4.998145789118114e-08,
+      "loss": 1.7953,
+      "step": 112
+    },
+    {
+      "epoch": 0.9658119658119658,
+      "grad_norm": 0.013639306649565697,
+      "learning_rate": 3.2007361901485455e-08,
+      "loss": 1.794,
+      "step": 113
+    },
+    {
+      "epoch": 0.9743589743589743,
+      "grad_norm": 0.013497625477612019,
+      "learning_rate": 1.8012557287367394e-08,
+      "loss": 1.7915,
+      "step": 114
+    },
+    {
+      "epoch": 0.9829059829059829,
+      "grad_norm": 0.013749259524047375,
+      "learning_rate": 8.008253688084888e-09,
+      "loss": 1.7899,
+      "step": 115
+    },
+    {
+      "epoch": 0.9914529914529915,
+      "grad_norm": 0.013962676748633385,
+      "learning_rate": 2.002464408392135e-09,
+      "loss": 1.7893,
+      "step": 116
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.013671280816197395,
+      "learning_rate": 0.0,
+      "loss": 1.7864,
+      "step": 117
+    },
+    {
+      "epoch": 1.0,
+      "step": 117,
+      "total_flos": 609898777804800.0,
+      "train_loss": 1.8114001343392918,
+      "train_runtime": 22537.3565,
+      "train_samples_per_second": 1.328,
+      "train_steps_per_second": 0.005
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 117,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 609898777804800.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

training_loss.png ADDED Viewed