Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

checkpoint-6000/config.json +54 -0
checkpoint-6000/model.safetensors +3 -0
checkpoint-6000/optimizer.pt +3 -0
checkpoint-6000/rng_state.pth +3 -0
checkpoint-6000/scaler.pt +3 -0
checkpoint-6000/scheduler.pt +3 -0
checkpoint-6000/trainer_state.json +955 -0
checkpoint-6000/training_args.bin +3 -0
checkpoint-8000/config.json +54 -0
checkpoint-8000/model.safetensors +3 -0
checkpoint-8000/optimizer.pt +3 -0
checkpoint-8000/rng_state.pth +3 -0
checkpoint-8000/scaler.pt +3 -0
checkpoint-8000/scheduler.pt +3 -0
checkpoint-8000/trainer_state.json +1259 -0
checkpoint-8000/training_args.bin +3 -0

checkpoint-6000/config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "architectures": [
+    "LayoutLMv3ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "coordinate_size": 128,
+  "eos_token_id": 2,
+  "has_relative_attention_bias": true,
+  "has_spatial_attention_bias": true,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "form",
+    "1": "invoice",
+    "2": "budget",
+    "3": "file folder",
+    "4": "questionnaire"
+  },
+  "initializer_range": 0.02,
+  "input_size": 224,
+  "intermediate_size": 3072,
+  "label2id": {
+    "budget": 2,
+    "file folder": 3,
+    "form": 0,
+    "invoice": 1,
+    "questionnaire": 4
+  },
+  "layer_norm_eps": 1e-05,
+  "max_2d_position_embeddings": 1024,
+  "max_position_embeddings": 514,
+  "max_rel_2d_pos": 256,
+  "max_rel_pos": 128,
+  "model_type": "layoutlmv3",
+  "num_attention_heads": 12,
+  "num_channels": 3,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "patch_size": 16,
+  "problem_type": "single_label_classification",
+  "rel_2d_pos_bins": 64,
+  "rel_pos_bins": 32,
+  "second_input_size": 112,
+  "shape_size": 128,
+  "text_embed": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.53.0",
+  "type_vocab_size": 1,
+  "visual_embed": true,
+  "vocab_size": 50265
+}

checkpoint-6000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:888897351408e17a05ef23c30ace359f70e9ee057f0063ad6bcb40fb5d2848d0
+size 503711980

checkpoint-6000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17e334739af48f1b40f4a607df332d8c1c7625c7eeccf345fc8440f270eabac6
+size 1007534138

checkpoint-6000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8f3367991c558e46e76dedf20f94300572e85c5cdf443ac1c5b98559a94124a
+size 14244

checkpoint-6000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7278d15ce19a0ba738f99c206bd892d3d610b68d3519b0eb32c2dff22e2e790
+size 988

checkpoint-6000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1deaa3298dfd895746f18f11ffb24ed906c98501dd64a733b2f362bb1957a467
+size 1064

checkpoint-6000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,955 @@

+{
+  "best_global_step": 6000,
+  "best_metric": 0.9,
+  "best_model_checkpoint": "./layoutlmv3-docclass-finetuned/checkpoint-6000",
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 6000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "grad_norm": 27.367076873779297,
+      "learning_rate": 9.954e-06,
+      "loss": 0.7398,
+      "step": 50
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 81.9110336303711,
+      "learning_rate": 9.905000000000001e-06,
+      "loss": 0.5986,
+      "step": 100
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 8.801929473876953,
+      "learning_rate": 9.855000000000001e-06,
+      "loss": 0.5238,
+      "step": 150
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 3.4470932483673096,
+      "learning_rate": 9.805000000000002e-06,
+      "loss": 0.7008,
+      "step": 200
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 66.06997680664062,
+      "learning_rate": 9.755e-06,
+      "loss": 0.5785,
+      "step": 250
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 14.404702186584473,
+      "learning_rate": 9.705000000000001e-06,
+      "loss": 0.644,
+      "step": 300
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 45.596656799316406,
+      "learning_rate": 9.655000000000002e-06,
+      "loss": 0.5607,
+      "step": 350
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 17.859411239624023,
+      "learning_rate": 9.605e-06,
+      "loss": 0.6093,
+      "step": 400
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 72.80252075195312,
+      "learning_rate": 9.555e-06,
+      "loss": 0.7857,
+      "step": 450
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 70.62848663330078,
+      "learning_rate": 9.505000000000001e-06,
+      "loss": 0.5691,
+      "step": 500
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 32.26290512084961,
+      "learning_rate": 9.456e-06,
+      "loss": 0.5647,
+      "step": 550
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 0.2549276351928711,
+      "learning_rate": 9.406000000000001e-06,
+      "loss": 0.6434,
+      "step": 600
+    },
+    {
+      "epoch": 0.65,
+      "grad_norm": 0.37813302874565125,
+      "learning_rate": 9.356e-06,
+      "loss": 0.6638,
+      "step": 650
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 78.35446166992188,
+      "learning_rate": 9.306e-06,
+      "loss": 0.6705,
+      "step": 700
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 54.32742691040039,
+      "learning_rate": 9.256e-06,
+      "loss": 0.5683,
+      "step": 750
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.2851010262966156,
+      "learning_rate": 9.206000000000001e-06,
+      "loss": 0.5826,
+      "step": 800
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 1.970667839050293,
+      "learning_rate": 9.156e-06,
+      "loss": 0.5985,
+      "step": 850
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 0.07515749335289001,
+      "learning_rate": 9.106e-06,
+      "loss": 0.5813,
+      "step": 900
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 26.057180404663086,
+      "learning_rate": 9.056000000000001e-06,
+      "loss": 0.4135,
+      "step": 950
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.34957027435302734,
+      "learning_rate": 9.006e-06,
+      "loss": 0.5918,
+      "step": 1000
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.852,
+      "eval_f1": 0.8517868434541516,
+      "eval_loss": 0.6099042892456055,
+      "eval_precision": 0.8540426659530076,
+      "eval_recall": 0.852,
+      "eval_runtime": 109.9388,
+      "eval_samples_per_second": 9.096,
+      "eval_steps_per_second": 2.274,
+      "step": 1000
+    },
+    {
+      "epoch": 1.05,
+      "grad_norm": 37.85305404663086,
+      "learning_rate": 8.956e-06,
+      "loss": 0.4872,
+      "step": 1050
+    },
+    {
+      "epoch": 1.1,
+      "grad_norm": 9.535030364990234,
+      "learning_rate": 8.906e-06,
+      "loss": 0.3874,
+      "step": 1100
+    },
+    {
+      "epoch": 1.15,
+      "grad_norm": 0.1979241967201233,
+      "learning_rate": 8.856000000000001e-06,
+      "loss": 0.5971,
+      "step": 1150
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.5306874513626099,
+      "learning_rate": 8.807e-06,
+      "loss": 0.4709,
+      "step": 1200
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 18.236913681030273,
+      "learning_rate": 8.757000000000001e-06,
+      "loss": 0.472,
+      "step": 1250
+    },
+    {
+      "epoch": 1.3,
+      "grad_norm": 74.27545166015625,
+      "learning_rate": 8.707000000000002e-06,
+      "loss": 0.4007,
+      "step": 1300
+    },
+    {
+      "epoch": 1.35,
+      "grad_norm": 6.827157974243164,
+      "learning_rate": 8.657e-06,
+      "loss": 0.4139,
+      "step": 1350
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 0.10050762444734573,
+      "learning_rate": 8.607e-06,
+      "loss": 0.5254,
+      "step": 1400
+    },
+    {
+      "epoch": 1.45,
+      "grad_norm": 0.05092502385377884,
+      "learning_rate": 8.557000000000001e-06,
+      "loss": 0.359,
+      "step": 1450
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 14.49219036102295,
+      "learning_rate": 8.507e-06,
+      "loss": 0.4157,
+      "step": 1500
+    },
+    {
+      "epoch": 1.55,
+      "grad_norm": 92.9802017211914,
+      "learning_rate": 8.457e-06,
+      "loss": 0.558,
+      "step": 1550
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 76.93692016601562,
+      "learning_rate": 8.407000000000001e-06,
+      "loss": 0.4579,
+      "step": 1600
+    },
+    {
+      "epoch": 1.65,
+      "grad_norm": 0.22004538774490356,
+      "learning_rate": 8.357000000000001e-06,
+      "loss": 0.6244,
+      "step": 1650
+    },
+    {
+      "epoch": 1.7,
+      "grad_norm": 0.06912333518266678,
+      "learning_rate": 8.307e-06,
+      "loss": 0.5163,
+      "step": 1700
+    },
+    {
+      "epoch": 1.75,
+      "grad_norm": 1.3707654476165771,
+      "learning_rate": 8.257e-06,
+      "loss": 0.4181,
+      "step": 1750
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 0.1415724754333496,
+      "learning_rate": 8.207000000000001e-06,
+      "loss": 0.6508,
+      "step": 1800
+    },
+    {
+      "epoch": 1.85,
+      "grad_norm": 21.731523513793945,
+      "learning_rate": 8.157e-06,
+      "loss": 0.4782,
+      "step": 1850
+    },
+    {
+      "epoch": 1.9,
+      "grad_norm": 0.45240655541419983,
+      "learning_rate": 8.107e-06,
+      "loss": 0.5325,
+      "step": 1900
+    },
+    {
+      "epoch": 1.95,
+      "grad_norm": 9.7157564163208,
+      "learning_rate": 8.057000000000001e-06,
+      "loss": 0.4879,
+      "step": 1950
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 61.20124816894531,
+      "learning_rate": 8.007e-06,
+      "loss": 0.5759,
+      "step": 2000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.871,
+      "eval_f1": 0.8709149644475538,
+      "eval_loss": 0.6178536415100098,
+      "eval_precision": 0.8759130395565256,
+      "eval_recall": 0.871,
+      "eval_runtime": 111.0284,
+      "eval_samples_per_second": 9.007,
+      "eval_steps_per_second": 2.252,
+      "step": 2000
+    },
+    {
+      "epoch": 2.05,
+      "grad_norm": 189.5020751953125,
+      "learning_rate": 7.957e-06,
+      "loss": 0.3242,
+      "step": 2050
+    },
+    {
+      "epoch": 2.1,
+      "grad_norm": 0.06589367240667343,
+      "learning_rate": 7.907e-06,
+      "loss": 0.3381,
+      "step": 2100
+    },
+    {
+      "epoch": 2.15,
+      "grad_norm": 6.254538536071777,
+      "learning_rate": 7.857e-06,
+      "loss": 0.2586,
+      "step": 2150
+    },
+    {
+      "epoch": 2.2,
+      "grad_norm": 0.035582080483436584,
+      "learning_rate": 7.807e-06,
+      "loss": 0.3091,
+      "step": 2200
+    },
+    {
+      "epoch": 2.25,
+      "grad_norm": 0.17389701306819916,
+      "learning_rate": 7.757e-06,
+      "loss": 0.5237,
+      "step": 2250
+    },
+    {
+      "epoch": 2.3,
+      "grad_norm": 33.96052169799805,
+      "learning_rate": 7.707000000000001e-06,
+      "loss": 0.4895,
+      "step": 2300
+    },
+    {
+      "epoch": 2.35,
+      "grad_norm": 0.2390211671590805,
+      "learning_rate": 7.657000000000001e-06,
+      "loss": 0.3044,
+      "step": 2350
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 0.29711782932281494,
+      "learning_rate": 7.607000000000001e-06,
+      "loss": 0.3455,
+      "step": 2400
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.026691796258091927,
+      "learning_rate": 7.557000000000001e-06,
+      "loss": 0.4515,
+      "step": 2450
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 0.12731046974658966,
+      "learning_rate": 7.507000000000001e-06,
+      "loss": 0.3598,
+      "step": 2500
+    },
+    {
+      "epoch": 2.55,
+      "grad_norm": 0.042913321405649185,
+      "learning_rate": 7.457000000000001e-06,
+      "loss": 0.2815,
+      "step": 2550
+    },
+    {
+      "epoch": 2.6,
+      "grad_norm": 0.06428048014640808,
+      "learning_rate": 7.407000000000001e-06,
+      "loss": 0.3822,
+      "step": 2600
+    },
+    {
+      "epoch": 2.65,
+      "grad_norm": 0.22337721288204193,
+      "learning_rate": 7.357000000000001e-06,
+      "loss": 0.217,
+      "step": 2650
+    },
+    {
+      "epoch": 2.7,
+      "grad_norm": 0.5337192416191101,
+      "learning_rate": 7.3070000000000005e-06,
+      "loss": 0.4874,
+      "step": 2700
+    },
+    {
+      "epoch": 2.75,
+      "grad_norm": 0.03206096589565277,
+      "learning_rate": 7.257000000000001e-06,
+      "loss": 0.4564,
+      "step": 2750
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 346.4653625488281,
+      "learning_rate": 7.207000000000001e-06,
+      "loss": 0.2794,
+      "step": 2800
+    },
+    {
+      "epoch": 2.85,
+      "grad_norm": 39.87053298950195,
+      "learning_rate": 7.157e-06,
+      "loss": 0.4392,
+      "step": 2850
+    },
+    {
+      "epoch": 2.9,
+      "grad_norm": 0.0478862039744854,
+      "learning_rate": 7.107000000000001e-06,
+      "loss": 0.2627,
+      "step": 2900
+    },
+    {
+      "epoch": 2.95,
+      "grad_norm": 0.04880313202738762,
+      "learning_rate": 7.057e-06,
+      "loss": 0.1502,
+      "step": 2950
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.8579753041267395,
+      "learning_rate": 7.007000000000001e-06,
+      "loss": 0.3137,
+      "step": 3000
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.858,
+      "eval_f1": 0.8568902628142059,
+      "eval_loss": 0.8766486048698425,
+      "eval_precision": 0.869630225735872,
+      "eval_recall": 0.858,
+      "eval_runtime": 112.5121,
+      "eval_samples_per_second": 8.888,
+      "eval_steps_per_second": 2.222,
+      "step": 3000
+    },
+    {
+      "epoch": 3.05,
+      "grad_norm": 0.09161412715911865,
+      "learning_rate": 6.9570000000000005e-06,
+      "loss": 0.2441,
+      "step": 3050
+    },
+    {
+      "epoch": 3.1,
+      "grad_norm": 285.05072021484375,
+      "learning_rate": 6.907e-06,
+      "loss": 0.4021,
+      "step": 3100
+    },
+    {
+      "epoch": 3.15,
+      "grad_norm": 0.03876444697380066,
+      "learning_rate": 6.857000000000001e-06,
+      "loss": 0.2032,
+      "step": 3150
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 9.66369342803955,
+      "learning_rate": 6.807e-06,
+      "loss": 0.1972,
+      "step": 3200
+    },
+    {
+      "epoch": 3.25,
+      "grad_norm": 0.06236199662089348,
+      "learning_rate": 6.757e-06,
+      "loss": 0.4157,
+      "step": 3250
+    },
+    {
+      "epoch": 3.3,
+      "grad_norm": 2.1778385639190674,
+      "learning_rate": 6.707e-06,
+      "loss": 0.3814,
+      "step": 3300
+    },
+    {
+      "epoch": 3.35,
+      "grad_norm": 0.02144041284918785,
+      "learning_rate": 6.657e-06,
+      "loss": 0.1562,
+      "step": 3350
+    },
+    {
+      "epoch": 3.4,
+      "grad_norm": 0.4679206311702728,
+      "learning_rate": 6.6070000000000004e-06,
+      "loss": 0.2698,
+      "step": 3400
+    },
+    {
+      "epoch": 3.45,
+      "grad_norm": 0.019845549017190933,
+      "learning_rate": 6.557e-06,
+      "loss": 0.2024,
+      "step": 3450
+    },
+    {
+      "epoch": 3.5,
+      "grad_norm": 0.03582359477877617,
+      "learning_rate": 6.507e-06,
+      "loss": 0.1805,
+      "step": 3500
+    },
+    {
+      "epoch": 3.55,
+      "grad_norm": 0.012999696657061577,
+      "learning_rate": 6.457000000000001e-06,
+      "loss": 0.2242,
+      "step": 3550
+    },
+    {
+      "epoch": 3.6,
+      "grad_norm": 61.022212982177734,
+      "learning_rate": 6.407000000000001e-06,
+      "loss": 0.2972,
+      "step": 3600
+    },
+    {
+      "epoch": 3.65,
+      "grad_norm": 0.17647244036197662,
+      "learning_rate": 6.357000000000001e-06,
+      "loss": 0.2507,
+      "step": 3650
+    },
+    {
+      "epoch": 3.7,
+      "grad_norm": 0.02148498222231865,
+      "learning_rate": 6.307000000000001e-06,
+      "loss": 0.3322,
+      "step": 3700
+    },
+    {
+      "epoch": 3.75,
+      "grad_norm": 8.956069946289062,
+      "learning_rate": 6.257000000000001e-06,
+      "loss": 0.2174,
+      "step": 3750
+    },
+    {
+      "epoch": 3.8,
+      "grad_norm": 0.019011685624718666,
+      "learning_rate": 6.2080000000000005e-06,
+      "loss": 0.2118,
+      "step": 3800
+    },
+    {
+      "epoch": 3.85,
+      "grad_norm": 0.01807810179889202,
+      "learning_rate": 6.158000000000001e-06,
+      "loss": 0.1908,
+      "step": 3850
+    },
+    {
+      "epoch": 3.9,
+      "grad_norm": 0.02416962757706642,
+      "learning_rate": 6.108000000000001e-06,
+      "loss": 0.1491,
+      "step": 3900
+    },
+    {
+      "epoch": 3.95,
+      "grad_norm": 19.60466766357422,
+      "learning_rate": 6.058e-06,
+      "loss": 0.2031,
+      "step": 3950
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.21647094190120697,
+      "learning_rate": 6.008000000000001e-06,
+      "loss": 0.2952,
+      "step": 4000
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.873,
+      "eval_f1": 0.872977072862821,
+      "eval_loss": 0.821958065032959,
+      "eval_precision": 0.8834745892873105,
+      "eval_recall": 0.873,
+      "eval_runtime": 112.9225,
+      "eval_samples_per_second": 8.856,
+      "eval_steps_per_second": 2.214,
+      "step": 4000
+    },
+    {
+      "epoch": 4.05,
+      "grad_norm": 0.08491307497024536,
+      "learning_rate": 5.958e-06,
+      "loss": 0.1665,
+      "step": 4050
+    },
+    {
+      "epoch": 4.1,
+      "grad_norm": 0.10018359869718552,
+      "learning_rate": 5.908e-06,
+      "loss": 0.2381,
+      "step": 4100
+    },
+    {
+      "epoch": 4.15,
+      "grad_norm": 0.01517445407807827,
+      "learning_rate": 5.8580000000000005e-06,
+      "loss": 0.1407,
+      "step": 4150
+    },
+    {
+      "epoch": 4.2,
+      "grad_norm": 0.2072858363389969,
+      "learning_rate": 5.808e-06,
+      "loss": 0.2508,
+      "step": 4200
+    },
+    {
+      "epoch": 4.25,
+      "grad_norm": 76.36699676513672,
+      "learning_rate": 5.758000000000001e-06,
+      "loss": 0.2175,
+      "step": 4250
+    },
+    {
+      "epoch": 4.3,
+      "grad_norm": 0.0169328935444355,
+      "learning_rate": 5.708e-06,
+      "loss": 0.1878,
+      "step": 4300
+    },
+    {
+      "epoch": 4.35,
+      "grad_norm": 23.842693328857422,
+      "learning_rate": 5.658e-06,
+      "loss": 0.1266,
+      "step": 4350
+    },
+    {
+      "epoch": 4.4,
+      "grad_norm": 233.6339111328125,
+      "learning_rate": 5.608e-06,
+      "loss": 0.1279,
+      "step": 4400
+    },
+    {
+      "epoch": 4.45,
+      "grad_norm": 173.3831024169922,
+      "learning_rate": 5.558e-06,
+      "loss": 0.1829,
+      "step": 4450
+    },
+    {
+      "epoch": 4.5,
+      "grad_norm": 0.08392331004142761,
+      "learning_rate": 5.508e-06,
+      "loss": 0.1668,
+      "step": 4500
+    },
+    {
+      "epoch": 4.55,
+      "grad_norm": 0.020959168672561646,
+      "learning_rate": 5.458e-06,
+      "loss": 0.0815,
+      "step": 4550
+    },
+    {
+      "epoch": 4.6,
+      "grad_norm": 225.8976593017578,
+      "learning_rate": 5.408e-06,
+      "loss": 0.2346,
+      "step": 4600
+    },
+    {
+      "epoch": 4.65,
+      "grad_norm": 358.5092468261719,
+      "learning_rate": 5.358000000000001e-06,
+      "loss": 0.1021,
+      "step": 4650
+    },
+    {
+      "epoch": 4.7,
+      "grad_norm": 165.55499267578125,
+      "learning_rate": 5.308000000000001e-06,
+      "loss": 0.2577,
+      "step": 4700
+    },
+    {
+      "epoch": 4.75,
+      "grad_norm": 0.11646530777215958,
+      "learning_rate": 5.258000000000001e-06,
+      "loss": 0.2029,
+      "step": 4750
+    },
+    {
+      "epoch": 4.8,
+      "grad_norm": 0.03793807700276375,
+      "learning_rate": 5.208000000000001e-06,
+      "loss": 0.1465,
+      "step": 4800
+    },
+    {
+      "epoch": 4.85,
+      "grad_norm": 113.57576751708984,
+      "learning_rate": 5.158e-06,
+      "loss": 0.1906,
+      "step": 4850
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.011114823631942272,
+      "learning_rate": 5.108000000000001e-06,
+      "loss": 0.1876,
+      "step": 4900
+    },
+    {
+      "epoch": 4.95,
+      "grad_norm": 77.72245025634766,
+      "learning_rate": 5.0580000000000005e-06,
+      "loss": 0.316,
+      "step": 4950
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.02318619005382061,
+      "learning_rate": 5.008000000000001e-06,
+      "loss": 0.1035,
+      "step": 5000
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.884,
+      "eval_f1": 0.8836532680104157,
+      "eval_loss": 0.7644630074501038,
+      "eval_precision": 0.8889154581905352,
+      "eval_recall": 0.884,
+      "eval_runtime": 111.6762,
+      "eval_samples_per_second": 8.954,
+      "eval_steps_per_second": 2.239,
+      "step": 5000
+    },
+    {
+      "epoch": 5.05,
+      "grad_norm": 0.03648075461387634,
+      "learning_rate": 4.958000000000001e-06,
+      "loss": 0.0445,
+      "step": 5050
+    },
+    {
+      "epoch": 5.1,
+      "grad_norm": 0.014485549181699753,
+      "learning_rate": 4.908e-06,
+      "loss": 0.0928,
+      "step": 5100
+    },
+    {
+      "epoch": 5.15,
+      "grad_norm": 0.019193926826119423,
+      "learning_rate": 4.858000000000001e-06,
+      "loss": 0.1156,
+      "step": 5150
+    },
+    {
+      "epoch": 5.2,
+      "grad_norm": 0.007525489665567875,
+      "learning_rate": 4.808e-06,
+      "loss": 0.1103,
+      "step": 5200
+    },
+    {
+      "epoch": 5.25,
+      "grad_norm": 0.02716565877199173,
+      "learning_rate": 4.758e-06,
+      "loss": 0.1653,
+      "step": 5250
+    },
+    {
+      "epoch": 5.3,
+      "grad_norm": 0.02299477905035019,
+      "learning_rate": 4.7080000000000005e-06,
+      "loss": 0.1915,
+      "step": 5300
+    },
+    {
+      "epoch": 5.35,
+      "grad_norm": 447.5741271972656,
+      "learning_rate": 4.658e-06,
+      "loss": 0.0903,
+      "step": 5350
+    },
+    {
+      "epoch": 5.4,
+      "grad_norm": 405.6177062988281,
+      "learning_rate": 4.608000000000001e-06,
+      "loss": 0.1506,
+      "step": 5400
+    },
+    {
+      "epoch": 5.45,
+      "grad_norm": 6.23228645324707,
+      "learning_rate": 4.558e-06,
+      "loss": 0.1342,
+      "step": 5450
+    },
+    {
+      "epoch": 5.5,
+      "grad_norm": 0.047868575900793076,
+      "learning_rate": 4.508e-06,
+      "loss": 0.1799,
+      "step": 5500
+    },
+    {
+      "epoch": 5.55,
+      "grad_norm": 115.90292358398438,
+      "learning_rate": 4.458e-06,
+      "loss": 0.0751,
+      "step": 5550
+    },
+    {
+      "epoch": 5.6,
+      "grad_norm": 0.1427452564239502,
+      "learning_rate": 4.408000000000001e-06,
+      "loss": 0.1132,
+      "step": 5600
+    },
+    {
+      "epoch": 5.65,
+      "grad_norm": 0.03756200522184372,
+      "learning_rate": 4.3580000000000005e-06,
+      "loss": 0.0219,
+      "step": 5650
+    },
+    {
+      "epoch": 5.7,
+      "grad_norm": 0.1948777437210083,
+      "learning_rate": 4.308000000000001e-06,
+      "loss": 0.1625,
+      "step": 5700
+    },
+    {
+      "epoch": 5.75,
+      "grad_norm": 0.14967912435531616,
+      "learning_rate": 4.2580000000000006e-06,
+      "loss": 0.1131,
+      "step": 5750
+    },
+    {
+      "epoch": 5.8,
+      "grad_norm": 2.684386730194092,
+      "learning_rate": 4.208e-06,
+      "loss": 0.1085,
+      "step": 5800
+    },
+    {
+      "epoch": 5.85,
+      "grad_norm": 0.037221621721982956,
+      "learning_rate": 4.158000000000001e-06,
+      "loss": 0.1957,
+      "step": 5850
+    },
+    {
+      "epoch": 5.9,
+      "grad_norm": 0.014206652529537678,
+      "learning_rate": 4.108e-06,
+      "loss": 0.0228,
+      "step": 5900
+    },
+    {
+      "epoch": 5.95,
+      "grad_norm": 0.008729885332286358,
+      "learning_rate": 4.058e-06,
+      "loss": 0.0421,
+      "step": 5950
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 253.62225341796875,
+      "learning_rate": 4.008e-06,
+      "loss": 0.0865,
+      "step": 6000
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.9,
+      "eval_f1": 0.9000352608410468,
+      "eval_loss": 0.7469241619110107,
+      "eval_precision": 0.9014304934175754,
+      "eval_recall": 0.9,
+      "eval_runtime": 112.6162,
+      "eval_samples_per_second": 8.88,
+      "eval_steps_per_second": 2.22,
+      "step": 6000
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 10000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 2,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6369958010880000.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-6000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f403144661d43c8a35a0a64782c635300fe17c4e900c1c22a3d8255e6ad2018
+size 5304

checkpoint-8000/config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "architectures": [
+    "LayoutLMv3ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "coordinate_size": 128,
+  "eos_token_id": 2,
+  "has_relative_attention_bias": true,
+  "has_spatial_attention_bias": true,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "form",
+    "1": "invoice",
+    "2": "budget",
+    "3": "file folder",
+    "4": "questionnaire"
+  },
+  "initializer_range": 0.02,
+  "input_size": 224,
+  "intermediate_size": 3072,
+  "label2id": {
+    "budget": 2,
+    "file folder": 3,
+    "form": 0,
+    "invoice": 1,
+    "questionnaire": 4
+  },
+  "layer_norm_eps": 1e-05,
+  "max_2d_position_embeddings": 1024,
+  "max_position_embeddings": 514,
+  "max_rel_2d_pos": 256,
+  "max_rel_pos": 128,
+  "model_type": "layoutlmv3",
+  "num_attention_heads": 12,
+  "num_channels": 3,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "patch_size": 16,
+  "problem_type": "single_label_classification",
+  "rel_2d_pos_bins": 64,
+  "rel_pos_bins": 32,
+  "second_input_size": 112,
+  "shape_size": 128,
+  "text_embed": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.53.0",
+  "type_vocab_size": 1,
+  "visual_embed": true,
+  "vocab_size": 50265
+}

checkpoint-8000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec6e1c6a5e7d8604e909f32bfbaeb0e40ba98946c5f5420cea3b0f4fca9df204
+size 503711980

checkpoint-8000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d4694725821e140611ef97aa16f4ad915981e95f0aece165acbcfe28922267c
+size 1007534138

checkpoint-8000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15f1326fd11903c8d72d967d58b235578cd2563695ef229f25c2022ef6a2432a
+size 14244

checkpoint-8000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95c585bdc83f0924a7abd8d4c3964b0325ee0eff5a0c7c32476a6491bf0cccce
+size 988

checkpoint-8000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd7aeb1ad1841b89f23b8cc54996197bdfcfbe9886c62934aac69a735621208c
+size 1064

checkpoint-8000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1259 @@

+{
+  "best_global_step": 6000,
+  "best_metric": 0.9,
+  "best_model_checkpoint": "./layoutlmv3-docclass-finetuned/checkpoint-6000",
+  "epoch": 8.0,
+  "eval_steps": 500,
+  "global_step": 8000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "grad_norm": 27.367076873779297,
+      "learning_rate": 9.954e-06,
+      "loss": 0.7398,
+      "step": 50
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 81.9110336303711,
+      "learning_rate": 9.905000000000001e-06,
+      "loss": 0.5986,
+      "step": 100
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 8.801929473876953,
+      "learning_rate": 9.855000000000001e-06,
+      "loss": 0.5238,
+      "step": 150
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 3.4470932483673096,
+      "learning_rate": 9.805000000000002e-06,
+      "loss": 0.7008,
+      "step": 200
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 66.06997680664062,
+      "learning_rate": 9.755e-06,
+      "loss": 0.5785,
+      "step": 250
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 14.404702186584473,
+      "learning_rate": 9.705000000000001e-06,
+      "loss": 0.644,
+      "step": 300
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 45.596656799316406,
+      "learning_rate": 9.655000000000002e-06,
+      "loss": 0.5607,
+      "step": 350
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 17.859411239624023,
+      "learning_rate": 9.605e-06,
+      "loss": 0.6093,
+      "step": 400
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 72.80252075195312,
+      "learning_rate": 9.555e-06,
+      "loss": 0.7857,
+      "step": 450
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 70.62848663330078,
+      "learning_rate": 9.505000000000001e-06,
+      "loss": 0.5691,
+      "step": 500
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 32.26290512084961,
+      "learning_rate": 9.456e-06,
+      "loss": 0.5647,
+      "step": 550
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 0.2549276351928711,
+      "learning_rate": 9.406000000000001e-06,
+      "loss": 0.6434,
+      "step": 600
+    },
+    {
+      "epoch": 0.65,
+      "grad_norm": 0.37813302874565125,
+      "learning_rate": 9.356e-06,
+      "loss": 0.6638,
+      "step": 650
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 78.35446166992188,
+      "learning_rate": 9.306e-06,
+      "loss": 0.6705,
+      "step": 700
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 54.32742691040039,
+      "learning_rate": 9.256e-06,
+      "loss": 0.5683,
+      "step": 750
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.2851010262966156,
+      "learning_rate": 9.206000000000001e-06,
+      "loss": 0.5826,
+      "step": 800
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 1.970667839050293,
+      "learning_rate": 9.156e-06,
+      "loss": 0.5985,
+      "step": 850
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 0.07515749335289001,
+      "learning_rate": 9.106e-06,
+      "loss": 0.5813,
+      "step": 900
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 26.057180404663086,
+      "learning_rate": 9.056000000000001e-06,
+      "loss": 0.4135,
+      "step": 950
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.34957027435302734,
+      "learning_rate": 9.006e-06,
+      "loss": 0.5918,
+      "step": 1000
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.852,
+      "eval_f1": 0.8517868434541516,
+      "eval_loss": 0.6099042892456055,
+      "eval_precision": 0.8540426659530076,
+      "eval_recall": 0.852,
+      "eval_runtime": 109.9388,
+      "eval_samples_per_second": 9.096,
+      "eval_steps_per_second": 2.274,
+      "step": 1000
+    },
+    {
+      "epoch": 1.05,
+      "grad_norm": 37.85305404663086,
+      "learning_rate": 8.956e-06,
+      "loss": 0.4872,
+      "step": 1050
+    },
+    {
+      "epoch": 1.1,
+      "grad_norm": 9.535030364990234,
+      "learning_rate": 8.906e-06,
+      "loss": 0.3874,
+      "step": 1100
+    },
+    {
+      "epoch": 1.15,
+      "grad_norm": 0.1979241967201233,
+      "learning_rate": 8.856000000000001e-06,
+      "loss": 0.5971,
+      "step": 1150
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.5306874513626099,
+      "learning_rate": 8.807e-06,
+      "loss": 0.4709,
+      "step": 1200
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 18.236913681030273,
+      "learning_rate": 8.757000000000001e-06,
+      "loss": 0.472,
+      "step": 1250
+    },
+    {
+      "epoch": 1.3,
+      "grad_norm": 74.27545166015625,
+      "learning_rate": 8.707000000000002e-06,
+      "loss": 0.4007,
+      "step": 1300
+    },
+    {
+      "epoch": 1.35,
+      "grad_norm": 6.827157974243164,
+      "learning_rate": 8.657e-06,
+      "loss": 0.4139,
+      "step": 1350
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 0.10050762444734573,
+      "learning_rate": 8.607e-06,
+      "loss": 0.5254,
+      "step": 1400
+    },
+    {
+      "epoch": 1.45,
+      "grad_norm": 0.05092502385377884,
+      "learning_rate": 8.557000000000001e-06,
+      "loss": 0.359,
+      "step": 1450
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 14.49219036102295,
+      "learning_rate": 8.507e-06,
+      "loss": 0.4157,
+      "step": 1500
+    },
+    {
+      "epoch": 1.55,
+      "grad_norm": 92.9802017211914,
+      "learning_rate": 8.457e-06,
+      "loss": 0.558,
+      "step": 1550
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 76.93692016601562,
+      "learning_rate": 8.407000000000001e-06,
+      "loss": 0.4579,
+      "step": 1600
+    },
+    {
+      "epoch": 1.65,
+      "grad_norm": 0.22004538774490356,
+      "learning_rate": 8.357000000000001e-06,
+      "loss": 0.6244,
+      "step": 1650
+    },
+    {
+      "epoch": 1.7,
+      "grad_norm": 0.06912333518266678,
+      "learning_rate": 8.307e-06,
+      "loss": 0.5163,
+      "step": 1700
+    },
+    {
+      "epoch": 1.75,
+      "grad_norm": 1.3707654476165771,
+      "learning_rate": 8.257e-06,
+      "loss": 0.4181,
+      "step": 1750
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 0.1415724754333496,
+      "learning_rate": 8.207000000000001e-06,
+      "loss": 0.6508,
+      "step": 1800
+    },
+    {
+      "epoch": 1.85,
+      "grad_norm": 21.731523513793945,
+      "learning_rate": 8.157e-06,
+      "loss": 0.4782,
+      "step": 1850
+    },
+    {
+      "epoch": 1.9,
+      "grad_norm": 0.45240655541419983,
+      "learning_rate": 8.107e-06,
+      "loss": 0.5325,
+      "step": 1900
+    },
+    {
+      "epoch": 1.95,
+      "grad_norm": 9.7157564163208,
+      "learning_rate": 8.057000000000001e-06,
+      "loss": 0.4879,
+      "step": 1950
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 61.20124816894531,
+      "learning_rate": 8.007e-06,
+      "loss": 0.5759,
+      "step": 2000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.871,
+      "eval_f1": 0.8709149644475538,
+      "eval_loss": 0.6178536415100098,
+      "eval_precision": 0.8759130395565256,
+      "eval_recall": 0.871,
+      "eval_runtime": 111.0284,
+      "eval_samples_per_second": 9.007,
+      "eval_steps_per_second": 2.252,
+      "step": 2000
+    },
+    {
+      "epoch": 2.05,
+      "grad_norm": 189.5020751953125,
+      "learning_rate": 7.957e-06,
+      "loss": 0.3242,
+      "step": 2050
+    },
+    {
+      "epoch": 2.1,
+      "grad_norm": 0.06589367240667343,
+      "learning_rate": 7.907e-06,
+      "loss": 0.3381,
+      "step": 2100
+    },
+    {
+      "epoch": 2.15,
+      "grad_norm": 6.254538536071777,
+      "learning_rate": 7.857e-06,
+      "loss": 0.2586,
+      "step": 2150
+    },
+    {
+      "epoch": 2.2,
+      "grad_norm": 0.035582080483436584,
+      "learning_rate": 7.807e-06,
+      "loss": 0.3091,
+      "step": 2200
+    },
+    {
+      "epoch": 2.25,
+      "grad_norm": 0.17389701306819916,
+      "learning_rate": 7.757e-06,
+      "loss": 0.5237,
+      "step": 2250
+    },
+    {
+      "epoch": 2.3,
+      "grad_norm": 33.96052169799805,
+      "learning_rate": 7.707000000000001e-06,
+      "loss": 0.4895,
+      "step": 2300
+    },
+    {
+      "epoch": 2.35,
+      "grad_norm": 0.2390211671590805,
+      "learning_rate": 7.657000000000001e-06,
+      "loss": 0.3044,
+      "step": 2350
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 0.29711782932281494,
+      "learning_rate": 7.607000000000001e-06,
+      "loss": 0.3455,
+      "step": 2400
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.026691796258091927,
+      "learning_rate": 7.557000000000001e-06,
+      "loss": 0.4515,
+      "step": 2450
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 0.12731046974658966,
+      "learning_rate": 7.507000000000001e-06,
+      "loss": 0.3598,
+      "step": 2500
+    },
+    {
+      "epoch": 2.55,
+      "grad_norm": 0.042913321405649185,
+      "learning_rate": 7.457000000000001e-06,
+      "loss": 0.2815,
+      "step": 2550
+    },
+    {
+      "epoch": 2.6,
+      "grad_norm": 0.06428048014640808,
+      "learning_rate": 7.407000000000001e-06,
+      "loss": 0.3822,
+      "step": 2600
+    },
+    {
+      "epoch": 2.65,
+      "grad_norm": 0.22337721288204193,
+      "learning_rate": 7.357000000000001e-06,
+      "loss": 0.217,
+      "step": 2650
+    },
+    {
+      "epoch": 2.7,
+      "grad_norm": 0.5337192416191101,
+      "learning_rate": 7.3070000000000005e-06,
+      "loss": 0.4874,
+      "step": 2700
+    },
+    {
+      "epoch": 2.75,
+      "grad_norm": 0.03206096589565277,
+      "learning_rate": 7.257000000000001e-06,
+      "loss": 0.4564,
+      "step": 2750
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 346.4653625488281,
+      "learning_rate": 7.207000000000001e-06,
+      "loss": 0.2794,
+      "step": 2800
+    },
+    {
+      "epoch": 2.85,
+      "grad_norm": 39.87053298950195,
+      "learning_rate": 7.157e-06,
+      "loss": 0.4392,
+      "step": 2850
+    },
+    {
+      "epoch": 2.9,
+      "grad_norm": 0.0478862039744854,
+      "learning_rate": 7.107000000000001e-06,
+      "loss": 0.2627,
+      "step": 2900
+    },
+    {
+      "epoch": 2.95,
+      "grad_norm": 0.04880313202738762,
+      "learning_rate": 7.057e-06,
+      "loss": 0.1502,
+      "step": 2950
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.8579753041267395,
+      "learning_rate": 7.007000000000001e-06,
+      "loss": 0.3137,
+      "step": 3000
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.858,
+      "eval_f1": 0.8568902628142059,
+      "eval_loss": 0.8766486048698425,
+      "eval_precision": 0.869630225735872,
+      "eval_recall": 0.858,
+      "eval_runtime": 112.5121,
+      "eval_samples_per_second": 8.888,
+      "eval_steps_per_second": 2.222,
+      "step": 3000
+    },
+    {
+      "epoch": 3.05,
+      "grad_norm": 0.09161412715911865,
+      "learning_rate": 6.9570000000000005e-06,
+      "loss": 0.2441,
+      "step": 3050
+    },
+    {
+      "epoch": 3.1,
+      "grad_norm": 285.05072021484375,
+      "learning_rate": 6.907e-06,
+      "loss": 0.4021,
+      "step": 3100
+    },
+    {
+      "epoch": 3.15,
+      "grad_norm": 0.03876444697380066,
+      "learning_rate": 6.857000000000001e-06,
+      "loss": 0.2032,
+      "step": 3150
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 9.66369342803955,
+      "learning_rate": 6.807e-06,
+      "loss": 0.1972,
+      "step": 3200
+    },
+    {
+      "epoch": 3.25,
+      "grad_norm": 0.06236199662089348,
+      "learning_rate": 6.757e-06,
+      "loss": 0.4157,
+      "step": 3250
+    },
+    {
+      "epoch": 3.3,
+      "grad_norm": 2.1778385639190674,
+      "learning_rate": 6.707e-06,
+      "loss": 0.3814,
+      "step": 3300
+    },
+    {
+      "epoch": 3.35,
+      "grad_norm": 0.02144041284918785,
+      "learning_rate": 6.657e-06,
+      "loss": 0.1562,
+      "step": 3350
+    },
+    {
+      "epoch": 3.4,
+      "grad_norm": 0.4679206311702728,
+      "learning_rate": 6.6070000000000004e-06,
+      "loss": 0.2698,
+      "step": 3400
+    },
+    {
+      "epoch": 3.45,
+      "grad_norm": 0.019845549017190933,
+      "learning_rate": 6.557e-06,
+      "loss": 0.2024,
+      "step": 3450
+    },
+    {
+      "epoch": 3.5,
+      "grad_norm": 0.03582359477877617,
+      "learning_rate": 6.507e-06,
+      "loss": 0.1805,
+      "step": 3500
+    },
+    {
+      "epoch": 3.55,
+      "grad_norm": 0.012999696657061577,
+      "learning_rate": 6.457000000000001e-06,
+      "loss": 0.2242,
+      "step": 3550
+    },
+    {
+      "epoch": 3.6,
+      "grad_norm": 61.022212982177734,
+      "learning_rate": 6.407000000000001e-06,
+      "loss": 0.2972,
+      "step": 3600
+    },
+    {
+      "epoch": 3.65,
+      "grad_norm": 0.17647244036197662,
+      "learning_rate": 6.357000000000001e-06,
+      "loss": 0.2507,
+      "step": 3650
+    },
+    {
+      "epoch": 3.7,
+      "grad_norm": 0.02148498222231865,
+      "learning_rate": 6.307000000000001e-06,
+      "loss": 0.3322,
+      "step": 3700
+    },
+    {
+      "epoch": 3.75,
+      "grad_norm": 8.956069946289062,
+      "learning_rate": 6.257000000000001e-06,
+      "loss": 0.2174,
+      "step": 3750
+    },
+    {
+      "epoch": 3.8,
+      "grad_norm": 0.019011685624718666,
+      "learning_rate": 6.2080000000000005e-06,
+      "loss": 0.2118,
+      "step": 3800
+    },
+    {
+      "epoch": 3.85,
+      "grad_norm": 0.01807810179889202,
+      "learning_rate": 6.158000000000001e-06,
+      "loss": 0.1908,
+      "step": 3850
+    },
+    {
+      "epoch": 3.9,
+      "grad_norm": 0.02416962757706642,
+      "learning_rate": 6.108000000000001e-06,
+      "loss": 0.1491,
+      "step": 3900
+    },
+    {
+      "epoch": 3.95,
+      "grad_norm": 19.60466766357422,
+      "learning_rate": 6.058e-06,
+      "loss": 0.2031,
+      "step": 3950
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.21647094190120697,
+      "learning_rate": 6.008000000000001e-06,
+      "loss": 0.2952,
+      "step": 4000
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.873,
+      "eval_f1": 0.872977072862821,
+      "eval_loss": 0.821958065032959,
+      "eval_precision": 0.8834745892873105,
+      "eval_recall": 0.873,
+      "eval_runtime": 112.9225,
+      "eval_samples_per_second": 8.856,
+      "eval_steps_per_second": 2.214,
+      "step": 4000
+    },
+    {
+      "epoch": 4.05,
+      "grad_norm": 0.08491307497024536,
+      "learning_rate": 5.958e-06,
+      "loss": 0.1665,
+      "step": 4050
+    },
+    {
+      "epoch": 4.1,
+      "grad_norm": 0.10018359869718552,
+      "learning_rate": 5.908e-06,
+      "loss": 0.2381,
+      "step": 4100
+    },
+    {
+      "epoch": 4.15,
+      "grad_norm": 0.01517445407807827,
+      "learning_rate": 5.8580000000000005e-06,
+      "loss": 0.1407,
+      "step": 4150
+    },
+    {
+      "epoch": 4.2,
+      "grad_norm": 0.2072858363389969,
+      "learning_rate": 5.808e-06,
+      "loss": 0.2508,
+      "step": 4200
+    },
+    {
+      "epoch": 4.25,
+      "grad_norm": 76.36699676513672,
+      "learning_rate": 5.758000000000001e-06,
+      "loss": 0.2175,
+      "step": 4250
+    },
+    {
+      "epoch": 4.3,
+      "grad_norm": 0.0169328935444355,
+      "learning_rate": 5.708e-06,
+      "loss": 0.1878,
+      "step": 4300
+    },
+    {
+      "epoch": 4.35,
+      "grad_norm": 23.842693328857422,
+      "learning_rate": 5.658e-06,
+      "loss": 0.1266,
+      "step": 4350
+    },
+    {
+      "epoch": 4.4,
+      "grad_norm": 233.6339111328125,
+      "learning_rate": 5.608e-06,
+      "loss": 0.1279,
+      "step": 4400
+    },
+    {
+      "epoch": 4.45,
+      "grad_norm": 173.3831024169922,
+      "learning_rate": 5.558e-06,
+      "loss": 0.1829,
+      "step": 4450
+    },
+    {
+      "epoch": 4.5,
+      "grad_norm": 0.08392331004142761,
+      "learning_rate": 5.508e-06,
+      "loss": 0.1668,
+      "step": 4500
+    },
+    {
+      "epoch": 4.55,
+      "grad_norm": 0.020959168672561646,
+      "learning_rate": 5.458e-06,
+      "loss": 0.0815,
+      "step": 4550
+    },
+    {
+      "epoch": 4.6,
+      "grad_norm": 225.8976593017578,
+      "learning_rate": 5.408e-06,
+      "loss": 0.2346,
+      "step": 4600
+    },
+    {
+      "epoch": 4.65,
+      "grad_norm": 358.5092468261719,
+      "learning_rate": 5.358000000000001e-06,
+      "loss": 0.1021,
+      "step": 4650
+    },
+    {
+      "epoch": 4.7,
+      "grad_norm": 165.55499267578125,
+      "learning_rate": 5.308000000000001e-06,
+      "loss": 0.2577,
+      "step": 4700
+    },
+    {
+      "epoch": 4.75,
+      "grad_norm": 0.11646530777215958,
+      "learning_rate": 5.258000000000001e-06,
+      "loss": 0.2029,
+      "step": 4750
+    },
+    {
+      "epoch": 4.8,
+      "grad_norm": 0.03793807700276375,
+      "learning_rate": 5.208000000000001e-06,
+      "loss": 0.1465,
+      "step": 4800
+    },
+    {
+      "epoch": 4.85,
+      "grad_norm": 113.57576751708984,
+      "learning_rate": 5.158e-06,
+      "loss": 0.1906,
+      "step": 4850
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.011114823631942272,
+      "learning_rate": 5.108000000000001e-06,
+      "loss": 0.1876,
+      "step": 4900
+    },
+    {
+      "epoch": 4.95,
+      "grad_norm": 77.72245025634766,
+      "learning_rate": 5.0580000000000005e-06,
+      "loss": 0.316,
+      "step": 4950
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.02318619005382061,
+      "learning_rate": 5.008000000000001e-06,
+      "loss": 0.1035,
+      "step": 5000
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.884,
+      "eval_f1": 0.8836532680104157,
+      "eval_loss": 0.7644630074501038,
+      "eval_precision": 0.8889154581905352,
+      "eval_recall": 0.884,
+      "eval_runtime": 111.6762,
+      "eval_samples_per_second": 8.954,
+      "eval_steps_per_second": 2.239,
+      "step": 5000
+    },
+    {
+      "epoch": 5.05,
+      "grad_norm": 0.03648075461387634,
+      "learning_rate": 4.958000000000001e-06,
+      "loss": 0.0445,
+      "step": 5050
+    },
+    {
+      "epoch": 5.1,
+      "grad_norm": 0.014485549181699753,
+      "learning_rate": 4.908e-06,
+      "loss": 0.0928,
+      "step": 5100
+    },
+    {
+      "epoch": 5.15,
+      "grad_norm": 0.019193926826119423,
+      "learning_rate": 4.858000000000001e-06,
+      "loss": 0.1156,
+      "step": 5150
+    },
+    {
+      "epoch": 5.2,
+      "grad_norm": 0.007525489665567875,
+      "learning_rate": 4.808e-06,
+      "loss": 0.1103,
+      "step": 5200
+    },
+    {
+      "epoch": 5.25,
+      "grad_norm": 0.02716565877199173,
+      "learning_rate": 4.758e-06,
+      "loss": 0.1653,
+      "step": 5250
+    },
+    {
+      "epoch": 5.3,
+      "grad_norm": 0.02299477905035019,
+      "learning_rate": 4.7080000000000005e-06,
+      "loss": 0.1915,
+      "step": 5300
+    },
+    {
+      "epoch": 5.35,
+      "grad_norm": 447.5741271972656,
+      "learning_rate": 4.658e-06,
+      "loss": 0.0903,
+      "step": 5350
+    },
+    {
+      "epoch": 5.4,
+      "grad_norm": 405.6177062988281,
+      "learning_rate": 4.608000000000001e-06,
+      "loss": 0.1506,
+      "step": 5400
+    },
+    {
+      "epoch": 5.45,
+      "grad_norm": 6.23228645324707,
+      "learning_rate": 4.558e-06,
+      "loss": 0.1342,
+      "step": 5450
+    },
+    {
+      "epoch": 5.5,
+      "grad_norm": 0.047868575900793076,
+      "learning_rate": 4.508e-06,
+      "loss": 0.1799,
+      "step": 5500
+    },
+    {
+      "epoch": 5.55,
+      "grad_norm": 115.90292358398438,
+      "learning_rate": 4.458e-06,
+      "loss": 0.0751,
+      "step": 5550
+    },
+    {
+      "epoch": 5.6,
+      "grad_norm": 0.1427452564239502,
+      "learning_rate": 4.408000000000001e-06,
+      "loss": 0.1132,
+      "step": 5600
+    },
+    {
+      "epoch": 5.65,
+      "grad_norm": 0.03756200522184372,
+      "learning_rate": 4.3580000000000005e-06,
+      "loss": 0.0219,
+      "step": 5650
+    },
+    {
+      "epoch": 5.7,
+      "grad_norm": 0.1948777437210083,
+      "learning_rate": 4.308000000000001e-06,
+      "loss": 0.1625,
+      "step": 5700
+    },
+    {
+      "epoch": 5.75,
+      "grad_norm": 0.14967912435531616,
+      "learning_rate": 4.2580000000000006e-06,
+      "loss": 0.1131,
+      "step": 5750
+    },
+    {
+      "epoch": 5.8,
+      "grad_norm": 2.684386730194092,
+      "learning_rate": 4.208e-06,
+      "loss": 0.1085,
+      "step": 5800
+    },
+    {
+      "epoch": 5.85,
+      "grad_norm": 0.037221621721982956,
+      "learning_rate": 4.158000000000001e-06,
+      "loss": 0.1957,
+      "step": 5850
+    },
+    {
+      "epoch": 5.9,
+      "grad_norm": 0.014206652529537678,
+      "learning_rate": 4.108e-06,
+      "loss": 0.0228,
+      "step": 5900
+    },
+    {
+      "epoch": 5.95,
+      "grad_norm": 0.008729885332286358,
+      "learning_rate": 4.058e-06,
+      "loss": 0.0421,
+      "step": 5950
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 253.62225341796875,
+      "learning_rate": 4.008e-06,
+      "loss": 0.0865,
+      "step": 6000
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.9,
+      "eval_f1": 0.9000352608410468,
+      "eval_loss": 0.7469241619110107,
+      "eval_precision": 0.9014304934175754,
+      "eval_recall": 0.9,
+      "eval_runtime": 112.6162,
+      "eval_samples_per_second": 8.88,
+      "eval_steps_per_second": 2.22,
+      "step": 6000
+    },
+    {
+      "epoch": 6.05,
+      "grad_norm": 0.011558984406292439,
+      "learning_rate": 3.958e-06,
+      "loss": 0.0105,
+      "step": 6050
+    },
+    {
+      "epoch": 6.1,
+      "grad_norm": 0.007709025871008635,
+      "learning_rate": 3.9080000000000005e-06,
+      "loss": 0.0543,
+      "step": 6100
+    },
+    {
+      "epoch": 6.15,
+      "grad_norm": 0.00899557676166296,
+      "learning_rate": 3.858e-06,
+      "loss": 0.0038,
+      "step": 6150
+    },
+    {
+      "epoch": 6.2,
+      "grad_norm": 0.1194106861948967,
+      "learning_rate": 3.8090000000000003e-06,
+      "loss": 0.0805,
+      "step": 6200
+    },
+    {
+      "epoch": 6.25,
+      "grad_norm": 0.0048521412536501884,
+      "learning_rate": 3.7590000000000003e-06,
+      "loss": 0.0221,
+      "step": 6250
+    },
+    {
+      "epoch": 6.3,
+      "grad_norm": 463.0719299316406,
+      "learning_rate": 3.7090000000000004e-06,
+      "loss": 0.1006,
+      "step": 6300
+    },
+    {
+      "epoch": 6.35,
+      "grad_norm": 0.010736881755292416,
+      "learning_rate": 3.6590000000000005e-06,
+      "loss": 0.1082,
+      "step": 6350
+    },
+    {
+      "epoch": 6.4,
+      "grad_norm": 0.008553804829716682,
+      "learning_rate": 3.609e-06,
+      "loss": 0.106,
+      "step": 6400
+    },
+    {
+      "epoch": 6.45,
+      "grad_norm": 192.78749084472656,
+      "learning_rate": 3.559e-06,
+      "loss": 0.1322,
+      "step": 6450
+    },
+    {
+      "epoch": 6.5,
+      "grad_norm": 0.361038476228714,
+      "learning_rate": 3.509e-06,
+      "loss": 0.1171,
+      "step": 6500
+    },
+    {
+      "epoch": 6.55,
+      "grad_norm": 0.012542732059955597,
+      "learning_rate": 3.4590000000000003e-06,
+      "loss": 0.0034,
+      "step": 6550
+    },
+    {
+      "epoch": 6.6,
+      "grad_norm": 0.004605053458362818,
+      "learning_rate": 3.409e-06,
+      "loss": 0.0422,
+      "step": 6600
+    },
+    {
+      "epoch": 6.65,
+      "grad_norm": 0.030367286875844002,
+      "learning_rate": 3.359e-06,
+      "loss": 0.2559,
+      "step": 6650
+    },
+    {
+      "epoch": 6.7,
+      "grad_norm": 0.027533065527677536,
+      "learning_rate": 3.3090000000000004e-06,
+      "loss": 0.0152,
+      "step": 6700
+    },
+    {
+      "epoch": 6.75,
+      "grad_norm": 0.024122724309563637,
+      "learning_rate": 3.2590000000000005e-06,
+      "loss": 0.0298,
+      "step": 6750
+    },
+    {
+      "epoch": 6.8,
+      "grad_norm": 0.01360723003745079,
+      "learning_rate": 3.2090000000000005e-06,
+      "loss": 0.0255,
+      "step": 6800
+    },
+    {
+      "epoch": 6.85,
+      "grad_norm": 0.010986587963998318,
+      "learning_rate": 3.1590000000000006e-06,
+      "loss": 0.1941,
+      "step": 6850
+    },
+    {
+      "epoch": 6.9,
+      "grad_norm": 0.021083349362015724,
+      "learning_rate": 3.1090000000000002e-06,
+      "loss": 0.1208,
+      "step": 6900
+    },
+    {
+      "epoch": 6.95,
+      "grad_norm": 0.004425186663866043,
+      "learning_rate": 3.0590000000000003e-06,
+      "loss": 0.0149,
+      "step": 6950
+    },
+    {
+      "epoch": 7.0,
+      "grad_norm": 0.005357071291655302,
+      "learning_rate": 3.0090000000000003e-06,
+      "loss": 0.0314,
+      "step": 7000
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.892,
+      "eval_f1": 0.8921548011209359,
+      "eval_loss": 0.7714166045188904,
+      "eval_precision": 0.8931565142806782,
+      "eval_recall": 0.892,
+      "eval_runtime": 114.8764,
+      "eval_samples_per_second": 8.705,
+      "eval_steps_per_second": 2.176,
+      "step": 7000
+    },
+    {
+      "epoch": 7.05,
+      "grad_norm": 0.004052102565765381,
+      "learning_rate": 2.9590000000000004e-06,
+      "loss": 0.0531,
+      "step": 7050
+    },
+    {
+      "epoch": 7.1,
+      "grad_norm": 0.0535280667245388,
+      "learning_rate": 2.909e-06,
+      "loss": 0.0202,
+      "step": 7100
+    },
+    {
+      "epoch": 7.15,
+      "grad_norm": 0.026509596034884453,
+      "learning_rate": 2.859e-06,
+      "loss": 0.1402,
+      "step": 7150
+    },
+    {
+      "epoch": 7.2,
+      "grad_norm": 1.2209473848342896,
+      "learning_rate": 2.809e-06,
+      "loss": 0.0266,
+      "step": 7200
+    },
+    {
+      "epoch": 7.25,
+      "grad_norm": 26.857582092285156,
+      "learning_rate": 2.759e-06,
+      "loss": 0.0006,
+      "step": 7250
+    },
+    {
+      "epoch": 7.3,
+      "grad_norm": 0.0071187699213624,
+      "learning_rate": 2.709e-06,
+      "loss": 0.0008,
+      "step": 7300
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.010221453383564949,
+      "learning_rate": 2.6590000000000003e-06,
+      "loss": 0.0487,
+      "step": 7350
+    },
+    {
+      "epoch": 7.4,
+      "grad_norm": 0.003582503879442811,
+      "learning_rate": 2.6090000000000003e-06,
+      "loss": 0.1051,
+      "step": 7400
+    },
+    {
+      "epoch": 7.45,
+      "grad_norm": 0.005786183290183544,
+      "learning_rate": 2.5590000000000004e-06,
+      "loss": 0.0449,
+      "step": 7450
+    },
+    {
+      "epoch": 7.5,
+      "grad_norm": 0.010817117057740688,
+      "learning_rate": 2.5090000000000005e-06,
+      "loss": 0.0345,
+      "step": 7500
+    },
+    {
+      "epoch": 7.55,
+      "grad_norm": 0.453058660030365,
+      "learning_rate": 2.459e-06,
+      "loss": 0.0352,
+      "step": 7550
+    },
+    {
+      "epoch": 7.6,
+      "grad_norm": 0.003637389512732625,
+      "learning_rate": 2.409e-06,
+      "loss": 0.0392,
+      "step": 7600
+    },
+    {
+      "epoch": 7.65,
+      "grad_norm": 0.005378212314099073,
+      "learning_rate": 2.359e-06,
+      "loss": 0.0795,
+      "step": 7650
+    },
+    {
+      "epoch": 7.7,
+      "grad_norm": 0.005855490919202566,
+      "learning_rate": 2.3090000000000003e-06,
+      "loss": 0.0617,
+      "step": 7700
+    },
+    {
+      "epoch": 7.75,
+      "grad_norm": 0.004148279316723347,
+      "learning_rate": 2.259e-06,
+      "loss": 0.0016,
+      "step": 7750
+    },
+    {
+      "epoch": 7.8,
+      "grad_norm": 1.2424975633621216,
+      "learning_rate": 2.2090000000000004e-06,
+      "loss": 0.0545,
+      "step": 7800
+    },
+    {
+      "epoch": 7.85,
+      "grad_norm": 0.151920348405838,
+      "learning_rate": 2.1590000000000004e-06,
+      "loss": 0.0003,
+      "step": 7850
+    },
+    {
+      "epoch": 7.9,
+      "grad_norm": 0.0055457111448049545,
+      "learning_rate": 2.109e-06,
+      "loss": 0.0003,
+      "step": 7900
+    },
+    {
+      "epoch": 7.95,
+      "grad_norm": 0.006306794937700033,
+      "learning_rate": 2.059e-06,
+      "loss": 0.0003,
+      "step": 7950
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 0.006967851426452398,
+      "learning_rate": 2.009e-06,
+      "loss": 0.0806,
+      "step": 8000
+    },
+    {
+      "epoch": 8.0,
+      "eval_accuracy": 0.891,
+      "eval_f1": 0.8907797096380854,
+      "eval_loss": 0.8573828935623169,
+      "eval_precision": 0.8919358819038602,
+      "eval_recall": 0.891,
+      "eval_runtime": 112.4936,
+      "eval_samples_per_second": 8.889,
+      "eval_steps_per_second": 2.222,
+      "step": 8000
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 10000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 2,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 2
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8493277347840000.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-8000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f403144661d43c8a35a0a64782c635300fe17c4e900c1c22a3d8255e6ad2018
+size 5304