Training in progress, epoch 2, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/config.json +89 -0
last-checkpoint/model.safetensors +3 -0
last-checkpoint/optimizer.pt +3 -0
last-checkpoint/preprocessor_config.json +23 -0
last-checkpoint/rng_state.pth +3 -0
last-checkpoint/scheduler.pt +3 -0
last-checkpoint/trainer_state.json +1236 -0
last-checkpoint/training_args.bin +3 -0

last-checkpoint/config.json ADDED Viewed

	@@ -0,0 +1,89 @@

+{
+  "_name_or_path": "microsoft/swin-tiny-patch4-window7-224",
+  "architectures": [
+    "SwinForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "depths": [
+    2,
+    2,
+    6,
+    2
+  ],
+  "drop_path_rate": 0.1,
+  "embed_dim": 96,
+  "encoder_stride": 32,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "e",
+    "1": "es",
+    "2": "n",
+    "3": "ne",
+    "4": "nes",
+    "5": "normal",
+    "6": "ns",
+    "7": "nw",
+    "8": "nwe",
+    "9": "nwes",
+    "10": "nws",
+    "11": "s",
+    "12": "w",
+    "13": "we",
+    "14": "wes",
+    "15": "ws"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "label2id": {
+    "e": 0,
+    "es": 1,
+    "n": 2,
+    "ne": 3,
+    "nes": 4,
+    "normal": 5,
+    "ns": 6,
+    "nw": 7,
+    "nwe": 8,
+    "nwes": 9,
+    "nws": 10,
+    "s": 11,
+    "w": 12,
+    "we": 13,
+    "wes": 14,
+    "ws": 15
+  },
+  "layer_norm_eps": 1e-05,
+  "mlp_ratio": 4.0,
+  "model_type": "swin",
+  "num_channels": 3,
+  "num_heads": [
+    3,
+    6,
+    12,
+    24
+  ],
+  "num_layers": 4,
+  "out_features": [
+    "stage4"
+  ],
+  "out_indices": [
+    4
+  ],
+  "patch_size": 4,
+  "path_norm": true,
+  "problem_type": "single_label_classification",
+  "qkv_bias": true,
+  "stage_names": [
+    "stem",
+    "stage1",
+    "stage2",
+    "stage3",
+    "stage4"
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.47.1",
+  "use_absolute_embeddings": false,
+  "window_size": 7
+}

last-checkpoint/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56d50c4af00394a47e8221cf29d52b872eff9ba711b81a9e3bff57df7ecada2b
+size 110385904

last-checkpoint/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0c0ec9fc16941b41120ee6b8a63c4ef3e6018d08bc56600d663ba1ff57e540f
+size 220436730

last-checkpoint/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "ViTImageProcessor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

last-checkpoint/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e91323fc4954bfb304d75ccd1eaa491100df7060e8351a39f1d8ab42ea15ade9
+size 14244

last-checkpoint/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18695d2fb12e6cf8ee010a2d0b8fecd135e4aa9b9c6e2717dab0c8212f0b4eb3
+size 1064

last-checkpoint/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1236 @@

+{
+  "best_metric": 1.0,
+  "best_model_checkpoint": "/content/drive/MyDrive/Colab Notebooks/16_label_check_point/checkpoint-563",
+  "epoch": 2.999111111111111,
+  "eval_steps": 500,
+  "global_step": 1686,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.017777777777777778,
+      "grad_norm": 10.457254409790039,
+      "learning_rate": 1.7793594306049826e-06,
+      "loss": 11.2334,
+      "step": 10
+    },
+    {
+      "epoch": 0.035555555555555556,
+      "grad_norm": 6.316349506378174,
+      "learning_rate": 3.558718861209965e-06,
+      "loss": 11.1807,
+      "step": 20
+    },
+    {
+      "epoch": 0.05333333333333334,
+      "grad_norm": 5.264202117919922,
+      "learning_rate": 5.338078291814947e-06,
+      "loss": 11.1463,
+      "step": 30
+    },
+    {
+      "epoch": 0.07111111111111111,
+      "grad_norm": 5.205317497253418,
+      "learning_rate": 7.11743772241993e-06,
+      "loss": 11.0929,
+      "step": 40
+    },
+    {
+      "epoch": 0.08888888888888889,
+      "grad_norm": 4.696351528167725,
+      "learning_rate": 8.896797153024912e-06,
+      "loss": 11.1015,
+      "step": 50
+    },
+    {
+      "epoch": 0.10666666666666667,
+      "grad_norm": 5.9699320793151855,
+      "learning_rate": 1.0676156583629894e-05,
+      "loss": 11.0795,
+      "step": 60
+    },
+    {
+      "epoch": 0.12444444444444444,
+      "grad_norm": 7.235191822052002,
+      "learning_rate": 1.2455516014234877e-05,
+      "loss": 11.046,
+      "step": 70
+    },
+    {
+      "epoch": 0.14222222222222222,
+      "grad_norm": 14.865583419799805,
+      "learning_rate": 1.423487544483986e-05,
+      "loss": 11.0113,
+      "step": 80
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 17.412696838378906,
+      "learning_rate": 1.601423487544484e-05,
+      "loss": 10.8502,
+      "step": 90
+    },
+    {
+      "epoch": 0.17777777777777778,
+      "grad_norm": 20.41786003112793,
+      "learning_rate": 1.7793594306049825e-05,
+      "loss": 10.5185,
+      "step": 100
+    },
+    {
+      "epoch": 0.19555555555555557,
+      "grad_norm": 24.58249282836914,
+      "learning_rate": 1.9572953736654805e-05,
+      "loss": 9.8301,
+      "step": 110
+    },
+    {
+      "epoch": 0.21333333333333335,
+      "grad_norm": 43.27064895629883,
+      "learning_rate": 2.135231316725979e-05,
+      "loss": 8.8636,
+      "step": 120
+    },
+    {
+      "epoch": 0.2311111111111111,
+      "grad_norm": 46.2359733581543,
+      "learning_rate": 2.313167259786477e-05,
+      "loss": 7.4813,
+      "step": 130
+    },
+    {
+      "epoch": 0.24888888888888888,
+      "grad_norm": 71.36530303955078,
+      "learning_rate": 2.4911032028469753e-05,
+      "loss": 6.0609,
+      "step": 140
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 61.219024658203125,
+      "learning_rate": 2.669039145907473e-05,
+      "loss": 4.5934,
+      "step": 150
+    },
+    {
+      "epoch": 0.28444444444444444,
+      "grad_norm": 52.64271545410156,
+      "learning_rate": 2.846975088967972e-05,
+      "loss": 3.3049,
+      "step": 160
+    },
+    {
+      "epoch": 0.3022222222222222,
+      "grad_norm": 40.045623779296875,
+      "learning_rate": 3.02491103202847e-05,
+      "loss": 2.0759,
+      "step": 170
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 32.63826370239258,
+      "learning_rate": 3.202846975088968e-05,
+      "loss": 1.2791,
+      "step": 180
+    },
+    {
+      "epoch": 0.3377777777777778,
+      "grad_norm": 39.16119384765625,
+      "learning_rate": 3.380782918149467e-05,
+      "loss": 0.9052,
+      "step": 190
+    },
+    {
+      "epoch": 0.35555555555555557,
+      "grad_norm": 36.06990051269531,
+      "learning_rate": 3.558718861209965e-05,
+      "loss": 0.6508,
+      "step": 200
+    },
+    {
+      "epoch": 0.37333333333333335,
+      "grad_norm": 22.00220489501953,
+      "learning_rate": 3.736654804270463e-05,
+      "loss": 0.6293,
+      "step": 210
+    },
+    {
+      "epoch": 0.39111111111111113,
+      "grad_norm": 27.334341049194336,
+      "learning_rate": 3.914590747330961e-05,
+      "loss": 0.5774,
+      "step": 220
+    },
+    {
+      "epoch": 0.4088888888888889,
+      "grad_norm": 21.130746841430664,
+      "learning_rate": 4.09252669039146e-05,
+      "loss": 0.4318,
+      "step": 230
+    },
+    {
+      "epoch": 0.4266666666666667,
+      "grad_norm": 21.37102508544922,
+      "learning_rate": 4.270462633451958e-05,
+      "loss": 0.414,
+      "step": 240
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "grad_norm": 86.8355712890625,
+      "learning_rate": 4.448398576512456e-05,
+      "loss": 0.4338,
+      "step": 250
+    },
+    {
+      "epoch": 0.4622222222222222,
+      "grad_norm": 9.731348037719727,
+      "learning_rate": 4.626334519572954e-05,
+      "loss": 0.3978,
+      "step": 260
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 23.59229278564453,
+      "learning_rate": 4.8042704626334526e-05,
+      "loss": 0.2971,
+      "step": 270
+    },
+    {
+      "epoch": 0.49777777777777776,
+      "grad_norm": 10.458292961120605,
+      "learning_rate": 4.9822064056939506e-05,
+      "loss": 0.3015,
+      "step": 280
+    },
+    {
+      "epoch": 0.5155555555555555,
+      "grad_norm": 13.044571876525879,
+      "learning_rate": 4.9822064056939506e-05,
+      "loss": 0.2013,
+      "step": 290
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 30.592700958251953,
+      "learning_rate": 4.962435745353895e-05,
+      "loss": 0.182,
+      "step": 300
+    },
+    {
+      "epoch": 0.5511111111111111,
+      "grad_norm": 26.582555770874023,
+      "learning_rate": 4.9426650850138396e-05,
+      "loss": 0.2276,
+      "step": 310
+    },
+    {
+      "epoch": 0.5688888888888889,
+      "grad_norm": 20.704526901245117,
+      "learning_rate": 4.9228944246737844e-05,
+      "loss": 0.1836,
+      "step": 320
+    },
+    {
+      "epoch": 0.5866666666666667,
+      "grad_norm": 29.400476455688477,
+      "learning_rate": 4.903123764333729e-05,
+      "loss": 0.2921,
+      "step": 330
+    },
+    {
+      "epoch": 0.6044444444444445,
+      "grad_norm": 23.031789779663086,
+      "learning_rate": 4.8833531039936733e-05,
+      "loss": 0.0848,
+      "step": 340
+    },
+    {
+      "epoch": 0.6222222222222222,
+      "grad_norm": 0.9449447393417358,
+      "learning_rate": 4.863582443653618e-05,
+      "loss": 0.0741,
+      "step": 350
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 20.334348678588867,
+      "learning_rate": 4.843811783313563e-05,
+      "loss": 0.1798,
+      "step": 360
+    },
+    {
+      "epoch": 0.6577777777777778,
+      "grad_norm": 13.533489227294922,
+      "learning_rate": 4.824041122973508e-05,
+      "loss": 0.1007,
+      "step": 370
+    },
+    {
+      "epoch": 0.6755555555555556,
+      "grad_norm": 1.5982263088226318,
+      "learning_rate": 4.8042704626334526e-05,
+      "loss": 0.1016,
+      "step": 380
+    },
+    {
+      "epoch": 0.6933333333333334,
+      "grad_norm": 2.323336362838745,
+      "learning_rate": 4.784499802293397e-05,
+      "loss": 0.1131,
+      "step": 390
+    },
+    {
+      "epoch": 0.7111111111111111,
+      "grad_norm": 24.458276748657227,
+      "learning_rate": 4.7647291419533415e-05,
+      "loss": 0.069,
+      "step": 400
+    },
+    {
+      "epoch": 0.7288888888888889,
+      "grad_norm": 29.530794143676758,
+      "learning_rate": 4.7449584816132864e-05,
+      "loss": 0.2415,
+      "step": 410
+    },
+    {
+      "epoch": 0.7466666666666667,
+      "grad_norm": 2.0636749267578125,
+      "learning_rate": 4.725187821273231e-05,
+      "loss": 0.1173,
+      "step": 420
+    },
+    {
+      "epoch": 0.7644444444444445,
+      "grad_norm": 2.225900888442993,
+      "learning_rate": 4.705417160933175e-05,
+      "loss": 0.0918,
+      "step": 430
+    },
+    {
+      "epoch": 0.7822222222222223,
+      "grad_norm": 7.136375904083252,
+      "learning_rate": 4.68564650059312e-05,
+      "loss": 0.1589,
+      "step": 440
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 2.6402971744537354,
+      "learning_rate": 4.665875840253064e-05,
+      "loss": 0.2547,
+      "step": 450
+    },
+    {
+      "epoch": 0.8177777777777778,
+      "grad_norm": 12.275321960449219,
+      "learning_rate": 4.64610517991301e-05,
+      "loss": 0.1082,
+      "step": 460
+    },
+    {
+      "epoch": 0.8355555555555556,
+      "grad_norm": 0.6636475920677185,
+      "learning_rate": 4.626334519572954e-05,
+      "loss": 0.0986,
+      "step": 470
+    },
+    {
+      "epoch": 0.8533333333333334,
+      "grad_norm": 27.784332275390625,
+      "learning_rate": 4.606563859232899e-05,
+      "loss": 0.1056,
+      "step": 480
+    },
+    {
+      "epoch": 0.8711111111111111,
+      "grad_norm": 22.047527313232422,
+      "learning_rate": 4.586793198892843e-05,
+      "loss": 0.1654,
+      "step": 490
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 15.613493919372559,
+      "learning_rate": 4.5670225385527876e-05,
+      "loss": 0.1517,
+      "step": 500
+    },
+    {
+      "epoch": 0.9066666666666666,
+      "grad_norm": 20.039813995361328,
+      "learning_rate": 4.5472518782127324e-05,
+      "loss": 0.1283,
+      "step": 510
+    },
+    {
+      "epoch": 0.9244444444444444,
+      "grad_norm": 21.469423294067383,
+      "learning_rate": 4.527481217872677e-05,
+      "loss": 0.0707,
+      "step": 520
+    },
+    {
+      "epoch": 0.9422222222222222,
+      "grad_norm": 38.251953125,
+      "learning_rate": 4.5077105575326214e-05,
+      "loss": 0.1116,
+      "step": 530
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 12.317658424377441,
+      "learning_rate": 4.487939897192566e-05,
+      "loss": 0.138,
+      "step": 540
+    },
+    {
+      "epoch": 0.9777777777777777,
+      "grad_norm": 5.918329238891602,
+      "learning_rate": 4.468169236852511e-05,
+      "loss": 0.1053,
+      "step": 550
+    },
+    {
+      "epoch": 0.9955555555555555,
+      "grad_norm": 12.31584358215332,
+      "learning_rate": 4.448398576512456e-05,
+      "loss": 0.1354,
+      "step": 560
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 1.0,
+      "eval_loss": 8.097552927210927e-05,
+      "eval_runtime": 74.3955,
+      "eval_samples_per_second": 107.533,
+      "eval_steps_per_second": 3.36,
+      "step": 563
+    },
+    {
+      "epoch": 1.0124444444444445,
+      "grad_norm": 10.894996643066406,
+      "learning_rate": 4.4286279161724006e-05,
+      "loss": 0.1134,
+      "step": 570
+    },
+    {
+      "epoch": 1.0302222222222222,
+      "grad_norm": 16.037940979003906,
+      "learning_rate": 4.408857255832345e-05,
+      "loss": 0.0917,
+      "step": 580
+    },
+    {
+      "epoch": 1.048,
+      "grad_norm": 0.164012148976326,
+      "learning_rate": 4.3890865954922896e-05,
+      "loss": 0.0816,
+      "step": 590
+    },
+    {
+      "epoch": 1.0657777777777777,
+      "grad_norm": 0.05601898953318596,
+      "learning_rate": 4.3693159351522344e-05,
+      "loss": 0.0778,
+      "step": 600
+    },
+    {
+      "epoch": 1.0835555555555556,
+      "grad_norm": 9.240290641784668,
+      "learning_rate": 4.349545274812179e-05,
+      "loss": 0.0893,
+      "step": 610
+    },
+    {
+      "epoch": 1.1013333333333333,
+      "grad_norm": 8.448566436767578,
+      "learning_rate": 4.3297746144721233e-05,
+      "loss": 0.0905,
+      "step": 620
+    },
+    {
+      "epoch": 1.1191111111111112,
+      "grad_norm": 1.9537012577056885,
+      "learning_rate": 4.310003954132068e-05,
+      "loss": 0.1561,
+      "step": 630
+    },
+    {
+      "epoch": 1.1368888888888888,
+      "grad_norm": 25.13317108154297,
+      "learning_rate": 4.290233293792013e-05,
+      "loss": 0.0856,
+      "step": 640
+    },
+    {
+      "epoch": 1.1546666666666667,
+      "grad_norm": 1.3837047815322876,
+      "learning_rate": 4.270462633451958e-05,
+      "loss": 0.0962,
+      "step": 650
+    },
+    {
+      "epoch": 1.1724444444444444,
+      "grad_norm": 46.560367584228516,
+      "learning_rate": 4.250691973111902e-05,
+      "loss": 0.1009,
+      "step": 660
+    },
+    {
+      "epoch": 1.1902222222222223,
+      "grad_norm": 42.40678787231445,
+      "learning_rate": 4.230921312771847e-05,
+      "loss": 0.1027,
+      "step": 670
+    },
+    {
+      "epoch": 1.208,
+      "grad_norm": 1.2823681831359863,
+      "learning_rate": 4.211150652431791e-05,
+      "loss": 0.0609,
+      "step": 680
+    },
+    {
+      "epoch": 1.2257777777777779,
+      "grad_norm": 3.9979896545410156,
+      "learning_rate": 4.1913799920917364e-05,
+      "loss": 0.0884,
+      "step": 690
+    },
+    {
+      "epoch": 1.2435555555555555,
+      "grad_norm": 23.4843692779541,
+      "learning_rate": 4.1716093317516805e-05,
+      "loss": 0.049,
+      "step": 700
+    },
+    {
+      "epoch": 1.2613333333333334,
+      "grad_norm": 0.9539620876312256,
+      "learning_rate": 4.151838671411625e-05,
+      "loss": 0.0481,
+      "step": 710
+    },
+    {
+      "epoch": 1.279111111111111,
+      "grad_norm": 0.23285511136054993,
+      "learning_rate": 4.13206801107157e-05,
+      "loss": 0.0388,
+      "step": 720
+    },
+    {
+      "epoch": 1.2968888888888888,
+      "grad_norm": 24.404285430908203,
+      "learning_rate": 4.112297350731515e-05,
+      "loss": 0.0425,
+      "step": 730
+    },
+    {
+      "epoch": 1.3146666666666667,
+      "grad_norm": 3.143155813217163,
+      "learning_rate": 4.09252669039146e-05,
+      "loss": 0.0313,
+      "step": 740
+    },
+    {
+      "epoch": 1.3324444444444445,
+      "grad_norm": 0.3580031991004944,
+      "learning_rate": 4.072756030051404e-05,
+      "loss": 0.0466,
+      "step": 750
+    },
+    {
+      "epoch": 1.3502222222222222,
+      "grad_norm": 39.96261978149414,
+      "learning_rate": 4.052985369711349e-05,
+      "loss": 0.0706,
+      "step": 760
+    },
+    {
+      "epoch": 1.3679999999999999,
+      "grad_norm": 22.012971878051758,
+      "learning_rate": 4.033214709371293e-05,
+      "loss": 0.1249,
+      "step": 770
+    },
+    {
+      "epoch": 1.3857777777777778,
+      "grad_norm": 0.20229700207710266,
+      "learning_rate": 4.013444049031238e-05,
+      "loss": 0.1089,
+      "step": 780
+    },
+    {
+      "epoch": 1.4035555555555557,
+      "grad_norm": 36.518348693847656,
+      "learning_rate": 3.9936733886911825e-05,
+      "loss": 0.0514,
+      "step": 790
+    },
+    {
+      "epoch": 1.4213333333333333,
+      "grad_norm": 0.011868222616612911,
+      "learning_rate": 3.973902728351127e-05,
+      "loss": 0.0298,
+      "step": 800
+    },
+    {
+      "epoch": 1.439111111111111,
+      "grad_norm": 23.455787658691406,
+      "learning_rate": 3.9541320680110714e-05,
+      "loss": 0.0365,
+      "step": 810
+    },
+    {
+      "epoch": 1.456888888888889,
+      "grad_norm": 0.5454981923103333,
+      "learning_rate": 3.934361407671016e-05,
+      "loss": 0.0603,
+      "step": 820
+    },
+    {
+      "epoch": 1.4746666666666668,
+      "grad_norm": 0.2658223509788513,
+      "learning_rate": 3.914590747330961e-05,
+      "loss": 0.0364,
+      "step": 830
+    },
+    {
+      "epoch": 1.4924444444444445,
+      "grad_norm": 32.6451301574707,
+      "learning_rate": 3.894820086990906e-05,
+      "loss": 0.1105,
+      "step": 840
+    },
+    {
+      "epoch": 1.5102222222222221,
+      "grad_norm": 0.37322714924812317,
+      "learning_rate": 3.87504942665085e-05,
+      "loss": 0.0102,
+      "step": 850
+    },
+    {
+      "epoch": 1.528,
+      "grad_norm": 1.2302775382995605,
+      "learning_rate": 3.855278766310795e-05,
+      "loss": 0.093,
+      "step": 860
+    },
+    {
+      "epoch": 1.545777777777778,
+      "grad_norm": 0.11981203407049179,
+      "learning_rate": 3.8355081059707396e-05,
+      "loss": 0.0721,
+      "step": 870
+    },
+    {
+      "epoch": 1.5635555555555556,
+      "grad_norm": 0.09180541336536407,
+      "learning_rate": 3.8157374456306844e-05,
+      "loss": 0.0408,
+      "step": 880
+    },
+    {
+      "epoch": 1.5813333333333333,
+      "grad_norm": 29.872051239013672,
+      "learning_rate": 3.7959667852906285e-05,
+      "loss": 0.0393,
+      "step": 890
+    },
+    {
+      "epoch": 1.5991111111111111,
+      "grad_norm": 1.0710923671722412,
+      "learning_rate": 3.7761961249505734e-05,
+      "loss": 0.0023,
+      "step": 900
+    },
+    {
+      "epoch": 1.616888888888889,
+      "grad_norm": 20.386173248291016,
+      "learning_rate": 3.756425464610518e-05,
+      "loss": 0.0605,
+      "step": 910
+    },
+    {
+      "epoch": 1.6346666666666667,
+      "grad_norm": 0.03605956584215164,
+      "learning_rate": 3.736654804270463e-05,
+      "loss": 0.0059,
+      "step": 920
+    },
+    {
+      "epoch": 1.6524444444444444,
+      "grad_norm": 0.38812369108200073,
+      "learning_rate": 3.716884143930408e-05,
+      "loss": 0.0819,
+      "step": 930
+    },
+    {
+      "epoch": 1.6702222222222223,
+      "grad_norm": 27.319766998291016,
+      "learning_rate": 3.697113483590352e-05,
+      "loss": 0.0201,
+      "step": 940
+    },
+    {
+      "epoch": 1.688,
+      "grad_norm": 20.58792495727539,
+      "learning_rate": 3.677342823250297e-05,
+      "loss": 0.0678,
+      "step": 950
+    },
+    {
+      "epoch": 1.7057777777777776,
+      "grad_norm": 0.33605310320854187,
+      "learning_rate": 3.6575721629102416e-05,
+      "loss": 0.0027,
+      "step": 960
+    },
+    {
+      "epoch": 1.7235555555555555,
+      "grad_norm": 0.13025720417499542,
+      "learning_rate": 3.6378015025701864e-05,
+      "loss": 0.0029,
+      "step": 970
+    },
+    {
+      "epoch": 1.7413333333333334,
+      "grad_norm": 31.07040023803711,
+      "learning_rate": 3.6180308422301305e-05,
+      "loss": 0.0626,
+      "step": 980
+    },
+    {
+      "epoch": 1.759111111111111,
+      "grad_norm": 0.055677346885204315,
+      "learning_rate": 3.598260181890075e-05,
+      "loss": 0.0079,
+      "step": 990
+    },
+    {
+      "epoch": 1.7768888888888887,
+      "grad_norm": 0.012918527238070965,
+      "learning_rate": 3.5784895215500194e-05,
+      "loss": 0.0308,
+      "step": 1000
+    },
+    {
+      "epoch": 1.7946666666666666,
+      "grad_norm": 12.896405220031738,
+      "learning_rate": 3.558718861209965e-05,
+      "loss": 0.0551,
+      "step": 1010
+    },
+    {
+      "epoch": 1.8124444444444445,
+      "grad_norm": 0.0037423851899802685,
+      "learning_rate": 3.538948200869909e-05,
+      "loss": 0.0437,
+      "step": 1020
+    },
+    {
+      "epoch": 1.8302222222222222,
+      "grad_norm": 0.2329370528459549,
+      "learning_rate": 3.519177540529854e-05,
+      "loss": 0.0281,
+      "step": 1030
+    },
+    {
+      "epoch": 1.8479999999999999,
+      "grad_norm": 0.027233602479100227,
+      "learning_rate": 3.499406880189798e-05,
+      "loss": 0.0136,
+      "step": 1040
+    },
+    {
+      "epoch": 1.8657777777777778,
+      "grad_norm": 0.1924924999475479,
+      "learning_rate": 3.4796362198497435e-05,
+      "loss": 0.056,
+      "step": 1050
+    },
+    {
+      "epoch": 1.8835555555555556,
+      "grad_norm": 0.02651727944612503,
+      "learning_rate": 3.4598655595096876e-05,
+      "loss": 0.0647,
+      "step": 1060
+    },
+    {
+      "epoch": 1.9013333333333333,
+      "grad_norm": 38.13447570800781,
+      "learning_rate": 3.4400948991696325e-05,
+      "loss": 0.0216,
+      "step": 1070
+    },
+    {
+      "epoch": 1.919111111111111,
+      "grad_norm": 11.278813362121582,
+      "learning_rate": 3.420324238829577e-05,
+      "loss": 0.0395,
+      "step": 1080
+    },
+    {
+      "epoch": 1.9368888888888889,
+      "grad_norm": 0.5866456031799316,
+      "learning_rate": 3.4005535784895214e-05,
+      "loss": 0.0433,
+      "step": 1090
+    },
+    {
+      "epoch": 1.9546666666666668,
+      "grad_norm": 2.6308796405792236,
+      "learning_rate": 3.380782918149467e-05,
+      "loss": 0.0118,
+      "step": 1100
+    },
+    {
+      "epoch": 1.9724444444444444,
+      "grad_norm": 0.1729055494070053,
+      "learning_rate": 3.361012257809411e-05,
+      "loss": 0.0251,
+      "step": 1110
+    },
+    {
+      "epoch": 1.9902222222222221,
+      "grad_norm": 10.363531112670898,
+      "learning_rate": 3.341241597469356e-05,
+      "loss": 0.0656,
+      "step": 1120
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 1.0,
+      "eval_loss": 2.5387274945387617e-05,
+      "eval_runtime": 75.7395,
+      "eval_samples_per_second": 105.625,
+      "eval_steps_per_second": 3.301,
+      "step": 1126
+    },
+    {
+      "epoch": 2.010666666666667,
+      "grad_norm": 29.318574905395508,
+      "learning_rate": 3.3214709371293e-05,
+      "loss": 0.2308,
+      "step": 1130
+    },
+    {
+      "epoch": 2.0284444444444443,
+      "grad_norm": 67.69547271728516,
+      "learning_rate": 3.301700276789245e-05,
+      "loss": 0.0604,
+      "step": 1140
+    },
+    {
+      "epoch": 2.046222222222222,
+      "grad_norm": 0.18736213445663452,
+      "learning_rate": 3.2819296164491896e-05,
+      "loss": 0.0546,
+      "step": 1150
+    },
+    {
+      "epoch": 2.064,
+      "grad_norm": 15.664798736572266,
+      "learning_rate": 3.2621589561091344e-05,
+      "loss": 0.0304,
+      "step": 1160
+    },
+    {
+      "epoch": 2.081777777777778,
+      "grad_norm": 0.13788799941539764,
+      "learning_rate": 3.2423882957690785e-05,
+      "loss": 0.0029,
+      "step": 1170
+    },
+    {
+      "epoch": 2.0995555555555554,
+      "grad_norm": 4.969343185424805,
+      "learning_rate": 3.2226176354290234e-05,
+      "loss": 0.0099,
+      "step": 1180
+    },
+    {
+      "epoch": 2.1173333333333333,
+      "grad_norm": 1.4300692081451416,
+      "learning_rate": 3.202846975088968e-05,
+      "loss": 0.0181,
+      "step": 1190
+    },
+    {
+      "epoch": 2.135111111111111,
+      "grad_norm": 0.019833851605653763,
+      "learning_rate": 3.183076314748913e-05,
+      "loss": 0.0107,
+      "step": 1200
+    },
+    {
+      "epoch": 2.152888888888889,
+      "grad_norm": 0.01474601961672306,
+      "learning_rate": 3.163305654408857e-05,
+      "loss": 0.0169,
+      "step": 1210
+    },
+    {
+      "epoch": 2.1706666666666665,
+      "grad_norm": 0.004971742630004883,
+      "learning_rate": 3.143534994068802e-05,
+      "loss": 0.0254,
+      "step": 1220
+    },
+    {
+      "epoch": 2.1884444444444444,
+      "grad_norm": 0.42502257227897644,
+      "learning_rate": 3.123764333728747e-05,
+      "loss": 0.0623,
+      "step": 1230
+    },
+    {
+      "epoch": 2.2062222222222223,
+      "grad_norm": 30.118955612182617,
+      "learning_rate": 3.1039936733886916e-05,
+      "loss": 0.0456,
+      "step": 1240
+    },
+    {
+      "epoch": 2.224,
+      "grad_norm": 7.1990132331848145,
+      "learning_rate": 3.0842230130486364e-05,
+      "loss": 0.0468,
+      "step": 1250
+    },
+    {
+      "epoch": 2.2417777777777776,
+      "grad_norm": 0.021625256165862083,
+      "learning_rate": 3.0644523527085805e-05,
+      "loss": 0.0375,
+      "step": 1260
+    },
+    {
+      "epoch": 2.2595555555555555,
+      "grad_norm": 0.08051316440105438,
+      "learning_rate": 3.044681692368525e-05,
+      "loss": 0.0097,
+      "step": 1270
+    },
+    {
+      "epoch": 2.2773333333333334,
+      "grad_norm": 38.26923751831055,
+      "learning_rate": 3.02491103202847e-05,
+      "loss": 0.0247,
+      "step": 1280
+    },
+    {
+      "epoch": 2.295111111111111,
+      "grad_norm": 1.5090163946151733,
+      "learning_rate": 3.0051403716884146e-05,
+      "loss": 0.0207,
+      "step": 1290
+    },
+    {
+      "epoch": 2.3128888888888888,
+      "grad_norm": 14.290279388427734,
+      "learning_rate": 2.985369711348359e-05,
+      "loss": 0.0041,
+      "step": 1300
+    },
+    {
+      "epoch": 2.3306666666666667,
+      "grad_norm": 0.025663571432232857,
+      "learning_rate": 2.9655990510083035e-05,
+      "loss": 0.0013,
+      "step": 1310
+    },
+    {
+      "epoch": 2.3484444444444446,
+      "grad_norm": 13.073821067810059,
+      "learning_rate": 2.9458283906682484e-05,
+      "loss": 0.006,
+      "step": 1320
+    },
+    {
+      "epoch": 2.3662222222222224,
+      "grad_norm": 0.03257250785827637,
+      "learning_rate": 2.9260577303281932e-05,
+      "loss": 0.0383,
+      "step": 1330
+    },
+    {
+      "epoch": 2.384,
+      "grad_norm": 2.0085232257843018,
+      "learning_rate": 2.906287069988138e-05,
+      "loss": 0.0552,
+      "step": 1340
+    },
+    {
+      "epoch": 2.401777777777778,
+      "grad_norm": 19.939329147338867,
+      "learning_rate": 2.8865164096480825e-05,
+      "loss": 0.0714,
+      "step": 1350
+    },
+    {
+      "epoch": 2.4195555555555557,
+      "grad_norm": 0.028250480070710182,
+      "learning_rate": 2.866745749308027e-05,
+      "loss": 0.0227,
+      "step": 1360
+    },
+    {
+      "epoch": 2.437333333333333,
+      "grad_norm": 0.6903110146522522,
+      "learning_rate": 2.846975088967972e-05,
+      "loss": 0.0085,
+      "step": 1370
+    },
+    {
+      "epoch": 2.455111111111111,
+      "grad_norm": 0.22341494262218475,
+      "learning_rate": 2.8272044286279166e-05,
+      "loss": 0.0093,
+      "step": 1380
+    },
+    {
+      "epoch": 2.472888888888889,
+      "grad_norm": 0.015194721519947052,
+      "learning_rate": 2.807433768287861e-05,
+      "loss": 0.0413,
+      "step": 1390
+    },
+    {
+      "epoch": 2.490666666666667,
+      "grad_norm": 0.035975806415081024,
+      "learning_rate": 2.7876631079478055e-05,
+      "loss": 0.0013,
+      "step": 1400
+    },
+    {
+      "epoch": 2.5084444444444447,
+      "grad_norm": 0.32504796981811523,
+      "learning_rate": 2.76789244760775e-05,
+      "loss": 0.0547,
+      "step": 1410
+    },
+    {
+      "epoch": 2.526222222222222,
+      "grad_norm": 0.34928593039512634,
+      "learning_rate": 2.748121787267695e-05,
+      "loss": 0.0556,
+      "step": 1420
+    },
+    {
+      "epoch": 2.544,
+      "grad_norm": 0.045603252947330475,
+      "learning_rate": 2.7283511269276396e-05,
+      "loss": 0.0156,
+      "step": 1430
+    },
+    {
+      "epoch": 2.561777777777778,
+      "grad_norm": 11.350424766540527,
+      "learning_rate": 2.708580466587584e-05,
+      "loss": 0.011,
+      "step": 1440
+    },
+    {
+      "epoch": 2.5795555555555554,
+      "grad_norm": 0.07788264751434326,
+      "learning_rate": 2.6888098062475286e-05,
+      "loss": 0.0607,
+      "step": 1450
+    },
+    {
+      "epoch": 2.5973333333333333,
+      "grad_norm": 0.06617221236228943,
+      "learning_rate": 2.669039145907473e-05,
+      "loss": 0.0102,
+      "step": 1460
+    },
+    {
+      "epoch": 2.615111111111111,
+      "grad_norm": 34.64754867553711,
+      "learning_rate": 2.6492684855674182e-05,
+      "loss": 0.0268,
+      "step": 1470
+    },
+    {
+      "epoch": 2.632888888888889,
+      "grad_norm": 9.72877311706543,
+      "learning_rate": 2.6294978252273626e-05,
+      "loss": 0.0051,
+      "step": 1480
+    },
+    {
+      "epoch": 2.6506666666666665,
+      "grad_norm": 21.619274139404297,
+      "learning_rate": 2.609727164887307e-05,
+      "loss": 0.0092,
+      "step": 1490
+    },
+    {
+      "epoch": 2.6684444444444444,
+      "grad_norm": 4.081634521484375,
+      "learning_rate": 2.589956504547252e-05,
+      "loss": 0.0369,
+      "step": 1500
+    },
+    {
+      "epoch": 2.6862222222222223,
+      "grad_norm": 0.009345272555947304,
+      "learning_rate": 2.5701858442071967e-05,
+      "loss": 0.018,
+      "step": 1510
+    },
+    {
+      "epoch": 2.7039999999999997,
+      "grad_norm": 73.03565979003906,
+      "learning_rate": 2.5504151838671416e-05,
+      "loss": 0.0945,
+      "step": 1520
+    },
+    {
+      "epoch": 2.7217777777777776,
+      "grad_norm": 1.0712828636169434,
+      "learning_rate": 2.530644523527086e-05,
+      "loss": 0.0156,
+      "step": 1530
+    },
+    {
+      "epoch": 2.7395555555555555,
+      "grad_norm": 0.023015221580863,
+      "learning_rate": 2.5108738631870305e-05,
+      "loss": 0.0408,
+      "step": 1540
+    },
+    {
+      "epoch": 2.7573333333333334,
+      "grad_norm": 3.0739543437957764,
+      "learning_rate": 2.4911032028469753e-05,
+      "loss": 0.0384,
+      "step": 1550
+    },
+    {
+      "epoch": 2.7751111111111113,
+      "grad_norm": 0.017695285379886627,
+      "learning_rate": 2.4713325425069198e-05,
+      "loss": 0.034,
+      "step": 1560
+    },
+    {
+      "epoch": 2.7928888888888888,
+      "grad_norm": 0.013055549003183842,
+      "learning_rate": 2.4515618821668646e-05,
+      "loss": 0.074,
+      "step": 1570
+    },
+    {
+      "epoch": 2.8106666666666666,
+      "grad_norm": 6.4839582443237305,
+      "learning_rate": 2.431791221826809e-05,
+      "loss": 0.0016,
+      "step": 1580
+    },
+    {
+      "epoch": 2.8284444444444445,
+      "grad_norm": 0.17747992277145386,
+      "learning_rate": 2.412020561486754e-05,
+      "loss": 0.0579,
+      "step": 1590
+    },
+    {
+      "epoch": 2.846222222222222,
+      "grad_norm": 0.07140109688043594,
+      "learning_rate": 2.3922499011466984e-05,
+      "loss": 0.0303,
+      "step": 1600
+    },
+    {
+      "epoch": 2.864,
+      "grad_norm": 0.0027039784472435713,
+      "learning_rate": 2.3724792408066432e-05,
+      "loss": 0.0337,
+      "step": 1610
+    },
+    {
+      "epoch": 2.8817777777777778,
+      "grad_norm": 0.015552740544080734,
+      "learning_rate": 2.3527085804665877e-05,
+      "loss": 0.0131,
+      "step": 1620
+    },
+    {
+      "epoch": 2.8995555555555557,
+      "grad_norm": 0.014052975922822952,
+      "learning_rate": 2.332937920126532e-05,
+      "loss": 0.0434,
+      "step": 1630
+    },
+    {
+      "epoch": 2.9173333333333336,
+      "grad_norm": 0.7165421843528748,
+      "learning_rate": 2.313167259786477e-05,
+      "loss": 0.0069,
+      "step": 1640
+    },
+    {
+      "epoch": 2.935111111111111,
+      "grad_norm": 0.020382430404424667,
+      "learning_rate": 2.2933965994464214e-05,
+      "loss": 0.0419,
+      "step": 1650
+    },
+    {
+      "epoch": 2.952888888888889,
+      "grad_norm": 0.07127852737903595,
+      "learning_rate": 2.2736259391063662e-05,
+      "loss": 0.0303,
+      "step": 1660
+    },
+    {
+      "epoch": 2.970666666666667,
+      "grad_norm": 2.5529978275299072,
+      "learning_rate": 2.2538552787663107e-05,
+      "loss": 0.0063,
+      "step": 1670
+    },
+    {
+      "epoch": 2.9884444444444442,
+      "grad_norm": 0.0018354392377659678,
+      "learning_rate": 2.2340846184262555e-05,
+      "loss": 0.0323,
+      "step": 1680
+    },
+    {
+      "epoch": 2.999111111111111,
+      "eval_accuracy": 1.0,
+      "eval_loss": 6.3951174524845555e-06,
+      "eval_runtime": 39.8564,
+      "eval_samples_per_second": 200.721,
+      "eval_steps_per_second": 6.273,
+      "step": 1686
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 2810,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.364628275991413e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

last-checkpoint/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a8175a9ad6281337906169a5a7d648239e46227467b846a8a98a2327fc311cd
+size 5432