Training in progress, epoch 0

Browse files

Files changed (3) hide show

adapter_model.safetensors +1 -1
trainer_state.json +429 -0
training_args.bin +1 -1

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b0874bfaca5fce4cdfa39458b5a4e5cfc442b348f2d884d217e16931bc9760de
 size 9034480

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8abe3837e75f4c4bb6a56ae60b667ec33ab7d1e6ae87c70b54862aa786b20c0
 size 9034480

trainer_state.json ADDED Viewed

	@@ -0,0 +1,429 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.87012987012987,
+  "eval_steps": 500,
+  "global_step": 570,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.17316017316017315,
+      "grad_norm": 0.73779296875,
+      "learning_rate": 0.00019984815164333163,
+      "loss": 1.5442,
+      "step": 10
+    },
+    {
+      "epoch": 0.3463203463203463,
+      "grad_norm": 0.67529296875,
+      "learning_rate": 0.00019939306773179497,
+      "loss": 1.4059,
+      "step": 20
+    },
+    {
+      "epoch": 0.5194805194805194,
+      "grad_norm": 0.67724609375,
+      "learning_rate": 0.00019863613034027224,
+      "loss": 1.3684,
+      "step": 30
+    },
+    {
+      "epoch": 0.6926406926406926,
+      "grad_norm": 0.62744140625,
+      "learning_rate": 0.00019757963826274357,
+      "loss": 1.3543,
+      "step": 40
+    },
+    {
+      "epoch": 0.8658008658008658,
+      "grad_norm": 0.5947265625,
+      "learning_rate": 0.00019622680003092503,
+      "loss": 1.3547,
+      "step": 50
+    },
+    {
+      "epoch": 1.0389610389610389,
+      "grad_norm": 0.5986328125,
+      "learning_rate": 0.00019458172417006347,
+      "loss": 1.3248,
+      "step": 60
+    },
+    {
+      "epoch": 1.2121212121212122,
+      "grad_norm": 0.638671875,
+      "learning_rate": 0.00019264940672148018,
+      "loss": 1.2603,
+      "step": 70
+    },
+    {
+      "epoch": 1.3852813852813852,
+      "grad_norm": 0.78857421875,
+      "learning_rate": 0.00019043571606975777,
+      "loss": 1.2683,
+      "step": 80
+    },
+    {
+      "epoch": 1.5584415584415585,
+      "grad_norm": 0.7412109375,
+      "learning_rate": 0.0001879473751206489,
+      "loss": 1.2356,
+      "step": 90
+    },
+    {
+      "epoch": 1.7316017316017316,
+      "grad_norm": 0.70458984375,
+      "learning_rate": 0.00018519194088383273,
+      "loss": 1.2849,
+      "step": 100
+    },
+    {
+      "epoch": 1.9047619047619047,
+      "grad_norm": 0.748046875,
+      "learning_rate": 0.0001821777815225245,
+      "loss": 1.2618,
+      "step": 110
+    },
+    {
+      "epoch": 2.0779220779220777,
+      "grad_norm": 0.68359375,
+      "learning_rate": 0.00017891405093963938,
+      "loss": 1.1707,
+      "step": 120
+    },
+    {
+      "epoch": 2.2510822510822512,
+      "grad_norm": 0.83740234375,
+      "learning_rate": 0.00017541066097768963,
+      "loss": 1.1533,
+      "step": 130
+    },
+    {
+      "epoch": 2.4242424242424243,
+      "grad_norm": 0.81005859375,
+      "learning_rate": 0.00017167825131684513,
+      "loss": 1.1604,
+      "step": 140
+    },
+    {
+      "epoch": 2.5974025974025974,
+      "grad_norm": 0.99462890625,
+      "learning_rate": 0.00016772815716257412,
+      "loss": 1.1347,
+      "step": 150
+    },
+    {
+      "epoch": 2.7705627705627704,
+      "grad_norm": 0.9736328125,
+      "learning_rate": 0.00016357237482099684,
+      "loss": 1.1071,
+      "step": 160
+    },
+    {
+      "epoch": 2.9437229437229435,
+      "grad_norm": 0.89111328125,
+      "learning_rate": 0.00015922352526649803,
+      "loss": 1.1747,
+      "step": 170
+    },
+    {
+      "epoch": 3.116883116883117,
+      "grad_norm": 1.0849609375,
+      "learning_rate": 0.00015469481581224272,
+      "loss": 1.0882,
+      "step": 180
+    },
+    {
+      "epoch": 3.29004329004329,
+      "grad_norm": 1.0166015625,
+      "learning_rate": 0.00015000000000000001,
+      "loss": 1.0027,
+      "step": 190
+    },
+    {
+      "epoch": 3.463203463203463,
+      "grad_norm": 1.0068359375,
+      "learning_rate": 0.00014515333583108896,
+      "loss": 1.059,
+      "step": 200
+    },
+    {
+      "epoch": 3.6363636363636362,
+      "grad_norm": 0.9970703125,
+      "learning_rate": 0.00014016954246529696,
+      "loss": 1.0374,
+      "step": 210
+    },
+    {
+      "epoch": 3.8095238095238093,
+      "grad_norm": 1.0283203125,
+      "learning_rate": 0.00013506375551927547,
+      "loss": 0.9911,
+      "step": 220
+    },
+    {
+      "epoch": 3.982683982683983,
+      "grad_norm": 1.0732421875,
+      "learning_rate": 0.00012985148110016947,
+      "loss": 1.089,
+      "step": 230
+    },
+    {
+      "epoch": 4.1558441558441555,
+      "grad_norm": 2.0078125,
+      "learning_rate": 0.00012454854871407994,
+      "loss": 0.9512,
+      "step": 240
+    },
+    {
+      "epoch": 4.329004329004329,
+      "grad_norm": 1.2607421875,
+      "learning_rate": 0.00011917106319237386,
+      "loss": 0.9437,
+      "step": 250
+    },
+    {
+      "epoch": 4.5021645021645025,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.00011373535578184082,
+      "loss": 0.9019,
+      "step": 260
+    },
+    {
+      "epoch": 4.675324675324675,
+      "grad_norm": 1.3486328125,
+      "learning_rate": 0.00010825793454723325,
+      "loss": 0.9206,
+      "step": 270
+    },
+    {
+      "epoch": 4.848484848484849,
+      "grad_norm": 1.1474609375,
+      "learning_rate": 0.00010275543423681621,
+      "loss": 0.9218,
+      "step": 280
+    },
+    {
+      "epoch": 5.021645021645021,
+      "grad_norm": 1.119140625,
+      "learning_rate": 9.724456576318381e-05,
+      "loss": 0.8835,
+      "step": 290
+    },
+    {
+      "epoch": 5.194805194805195,
+      "grad_norm": 1.25390625,
+      "learning_rate": 9.174206545276677e-05,
+      "loss": 0.8657,
+      "step": 300
+    },
+    {
+      "epoch": 5.367965367965368,
+      "grad_norm": 1.2841796875,
+      "learning_rate": 8.626464421815919e-05,
+      "loss": 0.8135,
+      "step": 310
+    },
+    {
+      "epoch": 5.541125541125541,
+      "grad_norm": 1.302734375,
+      "learning_rate": 8.082893680762619e-05,
+      "loss": 0.828,
+      "step": 320
+    },
+    {
+      "epoch": 5.714285714285714,
+      "grad_norm": 1.255859375,
+      "learning_rate": 7.54514512859201e-05,
+      "loss": 0.8363,
+      "step": 330
+    },
+    {
+      "epoch": 5.887445887445887,
+      "grad_norm": 1.2783203125,
+      "learning_rate": 7.014851889983057e-05,
+      "loss": 0.8205,
+      "step": 340
+    },
+    {
+      "epoch": 6.0606060606060606,
+      "grad_norm": 1.15234375,
+      "learning_rate": 6.493624448072457e-05,
+      "loss": 0.8129,
+      "step": 350
+    },
+    {
+      "epoch": 6.233766233766234,
+      "grad_norm": 1.314453125,
+      "learning_rate": 5.983045753470308e-05,
+      "loss": 0.7766,
+      "step": 360
+    },
+    {
+      "epoch": 6.406926406926407,
+      "grad_norm": 1.556640625,
+      "learning_rate": 5.484666416891109e-05,
+      "loss": 0.7152,
+      "step": 370
+    },
+    {
+      "epoch": 6.58008658008658,
+      "grad_norm": 1.3330078125,
+      "learning_rate": 5.000000000000002e-05,
+      "loss": 0.7112,
+      "step": 380
+    },
+    {
+      "epoch": 6.753246753246753,
+      "grad_norm": 1.3564453125,
+      "learning_rate": 4.530518418775733e-05,
+      "loss": 0.7955,
+      "step": 390
+    },
+    {
+      "epoch": 6.926406926406926,
+      "grad_norm": 1.31640625,
+      "learning_rate": 4.077647473350201e-05,
+      "loss": 0.8241,
+      "step": 400
+    },
+    {
+      "epoch": 7.0995670995671,
+      "grad_norm": 1.4013671875,
+      "learning_rate": 3.642762517900322e-05,
+      "loss": 0.7759,
+      "step": 410
+    },
+    {
+      "epoch": 7.2727272727272725,
+      "grad_norm": 1.4345703125,
+      "learning_rate": 3.227184283742591e-05,
+      "loss": 0.747,
+      "step": 420
+    },
+    {
+      "epoch": 7.445887445887446,
+      "grad_norm": 1.4423828125,
+      "learning_rate": 2.8321748683154893e-05,
+      "loss": 0.7206,
+      "step": 430
+    },
+    {
+      "epoch": 7.619047619047619,
+      "grad_norm": 1.599609375,
+      "learning_rate": 2.4589339022310386e-05,
+      "loss": 0.6764,
+      "step": 440
+    },
+    {
+      "epoch": 7.792207792207792,
+      "grad_norm": 1.3486328125,
+      "learning_rate": 2.1085949060360654e-05,
+      "loss": 0.7336,
+      "step": 450
+    },
+    {
+      "epoch": 7.965367965367966,
+      "grad_norm": 1.576171875,
+      "learning_rate": 1.7822218477475494e-05,
+      "loss": 0.6992,
+      "step": 460
+    },
+    {
+      "epoch": 8.13852813852814,
+      "grad_norm": 1.560546875,
+      "learning_rate": 1.4808059116167305e-05,
+      "loss": 0.6818,
+      "step": 470
+    },
+    {
+      "epoch": 8.311688311688311,
+      "grad_norm": 1.6240234375,
+      "learning_rate": 1.2052624879351104e-05,
+      "loss": 0.7221,
+      "step": 480
+    },
+    {
+      "epoch": 8.484848484848484,
+      "grad_norm": 1.431640625,
+      "learning_rate": 9.564283930242257e-06,
+      "loss": 0.6785,
+      "step": 490
+    },
+    {
+      "epoch": 8.658008658008658,
+      "grad_norm": 1.4521484375,
+      "learning_rate": 7.350593278519824e-06,
+      "loss": 0.6701,
+      "step": 500
+    },
+    {
+      "epoch": 8.831168831168831,
+      "grad_norm": 1.46484375,
+      "learning_rate": 5.418275829936537e-06,
+      "loss": 0.695,
+      "step": 510
+    },
+    {
+      "epoch": 9.004329004329005,
+      "grad_norm": 1.3701171875,
+      "learning_rate": 3.7731999690749585e-06,
+      "loss": 0.6976,
+      "step": 520
+    },
+    {
+      "epoch": 9.177489177489177,
+      "grad_norm": 1.3154296875,
+      "learning_rate": 2.420361737256438e-06,
+      "loss": 0.6386,
+      "step": 530
+    },
+    {
+      "epoch": 9.35064935064935,
+      "grad_norm": 1.5068359375,
+      "learning_rate": 1.3638696597277679e-06,
+      "loss": 0.706,
+      "step": 540
+    },
+    {
+      "epoch": 9.523809523809524,
+      "grad_norm": 1.5166015625,
+      "learning_rate": 6.069322682050516e-07,
+      "loss": 0.6972,
+      "step": 550
+    },
+    {
+      "epoch": 9.696969696969697,
+      "grad_norm": 1.2392578125,
+      "learning_rate": 1.518483566683826e-07,
+      "loss": 0.6888,
+      "step": 560
+    },
+    {
+      "epoch": 9.87012987012987,
+      "grad_norm": 1.4599609375,
+      "learning_rate": 0.0,
+      "loss": 0.6703,
+      "step": 570
+    },
+    {
+      "epoch": 9.87012987012987,
+      "step": 570,
+      "total_flos": 1.455489640562688e+16,
+      "train_loss": 0.9482885511297928,
+      "train_runtime": 548.5236,
+      "train_samples_per_second": 4.211,
+      "train_steps_per_second": 1.039
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 570,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 1.455489640562688e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d64871858de1a1d669885d62a94360ac8247d345a20c7497fbfd5750ef21923
 size 4539

 version https://git-lfs.github.com/spec/v1
+oid sha256:1273e270b14d56ea307a3f8cd43046a55903043c7fd9040e99bcaeaff1026887
 size 4539