Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

xlmr_hope_ml/checkpoint-8190/config.json +28 -0
xlmr_hope_ml/checkpoint-8190/model.safetensors +3 -0
xlmr_hope_ml/checkpoint-8190/optimizer.pt +3 -0
xlmr_hope_ml/checkpoint-8190/rng_state.pth +3 -0
xlmr_hope_ml/checkpoint-8190/scheduler.pt +3 -0
xlmr_hope_ml/checkpoint-8190/trainer_state.json +1225 -0
xlmr_hope_ml/checkpoint-8190/training_args.bin +3 -0

xlmr_hope_ml/checkpoint-8190/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "architectures": [
+    "XLMRobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "xlm-roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "transformers_version": "4.57.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 250002
+}

xlmr_hope_ml/checkpoint-8190/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:223d30157ac6007a18b19a5ccd57763d3ffec8aa6d87431aa421a62ed73c0406
+size 1112205008

xlmr_hope_ml/checkpoint-8190/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d47d0afc1eb876b23db47d6217b313c3fd8a4519e727fd2683d44394a27b65f
+size 2224532875

xlmr_hope_ml/checkpoint-8190/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0acf01d7b40d568a2dd5d875948460fb1a4f1c2199469f4b512628826c0a8b0f
+size 14645

xlmr_hope_ml/checkpoint-8190/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45338ca571085cd8a7411a7f31810ec518eedb9269c9c2db8984e82e9065bb53
+size 1465

xlmr_hope_ml/checkpoint-8190/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1225 @@

+{
+  "best_global_step": 8190,
+  "best_metric": 0.847602022392802,
+  "best_model_checkpoint": "./malviz_models1/xlmr_hope_ml/checkpoint-8190",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 8190,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.030525030525030524,
+      "grad_norm": 6.407163619995117,
+      "learning_rate": 1.9880341880341883e-05,
+      "loss": 0.6391,
+      "step": 50
+    },
+    {
+      "epoch": 0.06105006105006105,
+      "grad_norm": 32.01091003417969,
+      "learning_rate": 1.9758241758241762e-05,
+      "loss": 0.6692,
+      "step": 100
+    },
+    {
+      "epoch": 0.09157509157509157,
+      "grad_norm": 15.060482025146484,
+      "learning_rate": 1.9636141636141637e-05,
+      "loss": 0.6613,
+      "step": 150
+    },
+    {
+      "epoch": 0.1221001221001221,
+      "grad_norm": 5.653907299041748,
+      "learning_rate": 1.9514041514041516e-05,
+      "loss": 0.5875,
+      "step": 200
+    },
+    {
+      "epoch": 0.15262515262515264,
+      "grad_norm": 19.34565544128418,
+      "learning_rate": 1.9391941391941395e-05,
+      "loss": 0.6016,
+      "step": 250
+    },
+    {
+      "epoch": 0.18315018315018314,
+      "grad_norm": 16.258790969848633,
+      "learning_rate": 1.9269841269841273e-05,
+      "loss": 0.5973,
+      "step": 300
+    },
+    {
+      "epoch": 0.21367521367521367,
+      "grad_norm": 15.239129066467285,
+      "learning_rate": 1.914774114774115e-05,
+      "loss": 0.5859,
+      "step": 350
+    },
+    {
+      "epoch": 0.2442002442002442,
+      "grad_norm": 37.504249572753906,
+      "learning_rate": 1.9025641025641027e-05,
+      "loss": 0.606,
+      "step": 400
+    },
+    {
+      "epoch": 0.27472527472527475,
+      "grad_norm": 28.16376304626465,
+      "learning_rate": 1.8903540903540906e-05,
+      "loss": 0.5809,
+      "step": 450
+    },
+    {
+      "epoch": 0.3052503052503053,
+      "grad_norm": 58.881919860839844,
+      "learning_rate": 1.878144078144078e-05,
+      "loss": 0.611,
+      "step": 500
+    },
+    {
+      "epoch": 0.33577533577533575,
+      "grad_norm": 8.836572647094727,
+      "learning_rate": 1.865934065934066e-05,
+      "loss": 0.6063,
+      "step": 550
+    },
+    {
+      "epoch": 0.3663003663003663,
+      "grad_norm": 4.018986701965332,
+      "learning_rate": 1.853724053724054e-05,
+      "loss": 0.6199,
+      "step": 600
+    },
+    {
+      "epoch": 0.3968253968253968,
+      "grad_norm": 5.098257064819336,
+      "learning_rate": 1.8415140415140414e-05,
+      "loss": 0.5779,
+      "step": 650
+    },
+    {
+      "epoch": 0.42735042735042733,
+      "grad_norm": 13.460134506225586,
+      "learning_rate": 1.8293040293040293e-05,
+      "loss": 0.546,
+      "step": 700
+    },
+    {
+      "epoch": 0.45787545787545786,
+      "grad_norm": 14.35595417022705,
+      "learning_rate": 1.817094017094017e-05,
+      "loss": 0.5757,
+      "step": 750
+    },
+    {
+      "epoch": 0.4884004884004884,
+      "grad_norm": 11.146136283874512,
+      "learning_rate": 1.804884004884005e-05,
+      "loss": 0.486,
+      "step": 800
+    },
+    {
+      "epoch": 0.518925518925519,
+      "grad_norm": 27.647869110107422,
+      "learning_rate": 1.792673992673993e-05,
+      "loss": 0.5923,
+      "step": 850
+    },
+    {
+      "epoch": 0.5494505494505495,
+      "grad_norm": 26.801227569580078,
+      "learning_rate": 1.7804639804639804e-05,
+      "loss": 0.5901,
+      "step": 900
+    },
+    {
+      "epoch": 0.57997557997558,
+      "grad_norm": 5.888443946838379,
+      "learning_rate": 1.7682539682539683e-05,
+      "loss": 0.5396,
+      "step": 950
+    },
+    {
+      "epoch": 0.6105006105006106,
+      "grad_norm": 6.846390247344971,
+      "learning_rate": 1.7560439560439562e-05,
+      "loss": 0.494,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6410256410256411,
+      "grad_norm": 5.446658611297607,
+      "learning_rate": 1.743833943833944e-05,
+      "loss": 0.5021,
+      "step": 1050
+    },
+    {
+      "epoch": 0.6715506715506715,
+      "grad_norm": 9.575170516967773,
+      "learning_rate": 1.731623931623932e-05,
+      "loss": 0.5124,
+      "step": 1100
+    },
+    {
+      "epoch": 0.702075702075702,
+      "grad_norm": 11.39124870300293,
+      "learning_rate": 1.7194139194139198e-05,
+      "loss": 0.5161,
+      "step": 1150
+    },
+    {
+      "epoch": 0.7326007326007326,
+      "grad_norm": 8.475871086120605,
+      "learning_rate": 1.7072039072039073e-05,
+      "loss": 0.487,
+      "step": 1200
+    },
+    {
+      "epoch": 0.7631257631257631,
+      "grad_norm": 5.726651191711426,
+      "learning_rate": 1.6949938949938952e-05,
+      "loss": 0.4989,
+      "step": 1250
+    },
+    {
+      "epoch": 0.7936507936507936,
+      "grad_norm": 6.620838165283203,
+      "learning_rate": 1.682783882783883e-05,
+      "loss": 0.459,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8241758241758241,
+      "grad_norm": 14.812911987304688,
+      "learning_rate": 1.670573870573871e-05,
+      "loss": 0.5003,
+      "step": 1350
+    },
+    {
+      "epoch": 0.8547008547008547,
+      "grad_norm": 7.2252068519592285,
+      "learning_rate": 1.6583638583638585e-05,
+      "loss": 0.4551,
+      "step": 1400
+    },
+    {
+      "epoch": 0.8852258852258852,
+      "grad_norm": 3.8601975440979004,
+      "learning_rate": 1.6461538461538463e-05,
+      "loss": 0.4581,
+      "step": 1450
+    },
+    {
+      "epoch": 0.9157509157509157,
+      "grad_norm": 13.167617797851562,
+      "learning_rate": 1.6339438339438342e-05,
+      "loss": 0.4876,
+      "step": 1500
+    },
+    {
+      "epoch": 0.9462759462759462,
+      "grad_norm": 7.253393650054932,
+      "learning_rate": 1.6217338217338217e-05,
+      "loss": 0.4525,
+      "step": 1550
+    },
+    {
+      "epoch": 0.9768009768009768,
+      "grad_norm": 8.873754501342773,
+      "learning_rate": 1.6095238095238096e-05,
+      "loss": 0.4631,
+      "step": 1600
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7461459403905447,
+      "eval_f1": 0.766364618875453,
+      "eval_loss": 0.46973007917404175,
+      "eval_runtime": 3.4241,
+      "eval_samples_per_second": 284.161,
+      "eval_steps_per_second": 17.815,
+      "step": 1638
+    },
+    {
+      "epoch": 1.0073260073260073,
+      "grad_norm": 14.408209800720215,
+      "learning_rate": 1.5973137973137975e-05,
+      "loss": 0.4603,
+      "step": 1650
+    },
+    {
+      "epoch": 1.037851037851038,
+      "grad_norm": 22.586956024169922,
+      "learning_rate": 1.585103785103785e-05,
+      "loss": 0.4737,
+      "step": 1700
+    },
+    {
+      "epoch": 1.0683760683760684,
+      "grad_norm": 9.874870300292969,
+      "learning_rate": 1.572893772893773e-05,
+      "loss": 0.452,
+      "step": 1750
+    },
+    {
+      "epoch": 1.098901098901099,
+      "grad_norm": 24.893917083740234,
+      "learning_rate": 1.5606837606837608e-05,
+      "loss": 0.4263,
+      "step": 1800
+    },
+    {
+      "epoch": 1.1294261294261294,
+      "grad_norm": 18.05025291442871,
+      "learning_rate": 1.5484737484737486e-05,
+      "loss": 0.4424,
+      "step": 1850
+    },
+    {
+      "epoch": 1.1599511599511598,
+      "grad_norm": 3.7677385807037354,
+      "learning_rate": 1.5362637362637365e-05,
+      "loss": 0.4099,
+      "step": 1900
+    },
+    {
+      "epoch": 1.1904761904761905,
+      "grad_norm": 9.057735443115234,
+      "learning_rate": 1.524053724053724e-05,
+      "loss": 0.4298,
+      "step": 1950
+    },
+    {
+      "epoch": 1.221001221001221,
+      "grad_norm": 9.04925537109375,
+      "learning_rate": 1.5118437118437119e-05,
+      "loss": 0.4629,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2515262515262515,
+      "grad_norm": 15.214876174926758,
+      "learning_rate": 1.4996336996336998e-05,
+      "loss": 0.3746,
+      "step": 2050
+    },
+    {
+      "epoch": 1.282051282051282,
+      "grad_norm": 14.752812385559082,
+      "learning_rate": 1.4874236874236877e-05,
+      "loss": 0.454,
+      "step": 2100
+    },
+    {
+      "epoch": 1.3125763125763126,
+      "grad_norm": 13.337708473205566,
+      "learning_rate": 1.4752136752136754e-05,
+      "loss": 0.4289,
+      "step": 2150
+    },
+    {
+      "epoch": 1.3431013431013432,
+      "grad_norm": 15.018891334533691,
+      "learning_rate": 1.463003663003663e-05,
+      "loss": 0.4298,
+      "step": 2200
+    },
+    {
+      "epoch": 1.3736263736263736,
+      "grad_norm": 40.46508026123047,
+      "learning_rate": 1.450793650793651e-05,
+      "loss": 0.3758,
+      "step": 2250
+    },
+    {
+      "epoch": 1.404151404151404,
+      "grad_norm": 43.62735366821289,
+      "learning_rate": 1.4385836385836386e-05,
+      "loss": 0.4464,
+      "step": 2300
+    },
+    {
+      "epoch": 1.4346764346764347,
+      "grad_norm": 12.847195625305176,
+      "learning_rate": 1.4263736263736265e-05,
+      "loss": 0.4321,
+      "step": 2350
+    },
+    {
+      "epoch": 1.4652014652014653,
+      "grad_norm": 17.7137508392334,
+      "learning_rate": 1.4141636141636144e-05,
+      "loss": 0.4588,
+      "step": 2400
+    },
+    {
+      "epoch": 1.4957264957264957,
+      "grad_norm": 18.785490036010742,
+      "learning_rate": 1.4019536019536019e-05,
+      "loss": 0.3921,
+      "step": 2450
+    },
+    {
+      "epoch": 1.5262515262515262,
+      "grad_norm": 7.5587239265441895,
+      "learning_rate": 1.3897435897435898e-05,
+      "loss": 0.4384,
+      "step": 2500
+    },
+    {
+      "epoch": 1.5567765567765568,
+      "grad_norm": 38.49454116821289,
+      "learning_rate": 1.3775335775335776e-05,
+      "loss": 0.4314,
+      "step": 2550
+    },
+    {
+      "epoch": 1.5873015873015874,
+      "grad_norm": 10.616717338562012,
+      "learning_rate": 1.3653235653235655e-05,
+      "loss": 0.4251,
+      "step": 2600
+    },
+    {
+      "epoch": 1.6178266178266179,
+      "grad_norm": 13.551919937133789,
+      "learning_rate": 1.3531135531135532e-05,
+      "loss": 0.4647,
+      "step": 2650
+    },
+    {
+      "epoch": 1.6483516483516483,
+      "grad_norm": 22.2562313079834,
+      "learning_rate": 1.340903540903541e-05,
+      "loss": 0.4042,
+      "step": 2700
+    },
+    {
+      "epoch": 1.678876678876679,
+      "grad_norm": 113.62215423583984,
+      "learning_rate": 1.3286935286935288e-05,
+      "loss": 0.4039,
+      "step": 2750
+    },
+    {
+      "epoch": 1.7094017094017095,
+      "grad_norm": 17.464237213134766,
+      "learning_rate": 1.3164835164835165e-05,
+      "loss": 0.4271,
+      "step": 2800
+    },
+    {
+      "epoch": 1.73992673992674,
+      "grad_norm": 25.872488021850586,
+      "learning_rate": 1.3042735042735044e-05,
+      "loss": 0.4423,
+      "step": 2850
+    },
+    {
+      "epoch": 1.7704517704517704,
+      "grad_norm": 16.714445114135742,
+      "learning_rate": 1.2920634920634922e-05,
+      "loss": 0.3654,
+      "step": 2900
+    },
+    {
+      "epoch": 1.800976800976801,
+      "grad_norm": 27.8351993560791,
+      "learning_rate": 1.2798534798534798e-05,
+      "loss": 0.4181,
+      "step": 2950
+    },
+    {
+      "epoch": 1.8315018315018317,
+      "grad_norm": 4.469764709472656,
+      "learning_rate": 1.2676434676434676e-05,
+      "loss": 0.3711,
+      "step": 3000
+    },
+    {
+      "epoch": 1.862026862026862,
+      "grad_norm": 65.09400939941406,
+      "learning_rate": 1.2554334554334555e-05,
+      "loss": 0.4071,
+      "step": 3050
+    },
+    {
+      "epoch": 1.8925518925518925,
+      "grad_norm": 17.013628005981445,
+      "learning_rate": 1.2432234432234434e-05,
+      "loss": 0.376,
+      "step": 3100
+    },
+    {
+      "epoch": 1.9230769230769231,
+      "grad_norm": 15.664719581604004,
+      "learning_rate": 1.2310134310134313e-05,
+      "loss": 0.4217,
+      "step": 3150
+    },
+    {
+      "epoch": 1.9536019536019538,
+      "grad_norm": 17.986469268798828,
+      "learning_rate": 1.218803418803419e-05,
+      "loss": 0.3626,
+      "step": 3200
+    },
+    {
+      "epoch": 1.9841269841269842,
+      "grad_norm": 14.723675727844238,
+      "learning_rate": 1.2065934065934067e-05,
+      "loss": 0.4065,
+      "step": 3250
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8335046248715313,
+      "eval_f1": 0.8335046248715313,
+      "eval_loss": 0.39165642857551575,
+      "eval_runtime": 3.4232,
+      "eval_samples_per_second": 284.237,
+      "eval_steps_per_second": 17.82,
+      "step": 3276
+    },
+    {
+      "epoch": 2.0146520146520146,
+      "grad_norm": 11.310654640197754,
+      "learning_rate": 1.1943833943833945e-05,
+      "loss": 0.4243,
+      "step": 3300
+    },
+    {
+      "epoch": 2.045177045177045,
+      "grad_norm": 26.5709228515625,
+      "learning_rate": 1.1821733821733822e-05,
+      "loss": 0.4102,
+      "step": 3350
+    },
+    {
+      "epoch": 2.075702075702076,
+      "grad_norm": 9.054444313049316,
+      "learning_rate": 1.1699633699633701e-05,
+      "loss": 0.3512,
+      "step": 3400
+    },
+    {
+      "epoch": 2.1062271062271063,
+      "grad_norm": 29.050796508789062,
+      "learning_rate": 1.157753357753358e-05,
+      "loss": 0.3907,
+      "step": 3450
+    },
+    {
+      "epoch": 2.1367521367521367,
+      "grad_norm": 46.302555084228516,
+      "learning_rate": 1.1455433455433455e-05,
+      "loss": 0.3831,
+      "step": 3500
+    },
+    {
+      "epoch": 2.167277167277167,
+      "grad_norm": 3.2793078422546387,
+      "learning_rate": 1.1333333333333334e-05,
+      "loss": 0.3318,
+      "step": 3550
+    },
+    {
+      "epoch": 2.197802197802198,
+      "grad_norm": 3.5271809101104736,
+      "learning_rate": 1.1211233211233213e-05,
+      "loss": 0.4014,
+      "step": 3600
+    },
+    {
+      "epoch": 2.2283272283272284,
+      "grad_norm": 28.36669921875,
+      "learning_rate": 1.1089133089133091e-05,
+      "loss": 0.298,
+      "step": 3650
+    },
+    {
+      "epoch": 2.258852258852259,
+      "grad_norm": 16.253847122192383,
+      "learning_rate": 1.0967032967032968e-05,
+      "loss": 0.3707,
+      "step": 3700
+    },
+    {
+      "epoch": 2.2893772893772892,
+      "grad_norm": 33.73545455932617,
+      "learning_rate": 1.0844932844932845e-05,
+      "loss": 0.407,
+      "step": 3750
+    },
+    {
+      "epoch": 2.3199023199023197,
+      "grad_norm": 7.105137825012207,
+      "learning_rate": 1.0722832722832724e-05,
+      "loss": 0.3712,
+      "step": 3800
+    },
+    {
+      "epoch": 2.3504273504273505,
+      "grad_norm": 16.38484764099121,
+      "learning_rate": 1.0600732600732601e-05,
+      "loss": 0.358,
+      "step": 3850
+    },
+    {
+      "epoch": 2.380952380952381,
+      "grad_norm": 12.821770668029785,
+      "learning_rate": 1.047863247863248e-05,
+      "loss": 0.3807,
+      "step": 3900
+    },
+    {
+      "epoch": 2.4114774114774113,
+      "grad_norm": 13.208888053894043,
+      "learning_rate": 1.0356532356532358e-05,
+      "loss": 0.3175,
+      "step": 3950
+    },
+    {
+      "epoch": 2.442002442002442,
+      "grad_norm": 3.4101309776306152,
+      "learning_rate": 1.0234432234432234e-05,
+      "loss": 0.3779,
+      "step": 4000
+    },
+    {
+      "epoch": 2.4725274725274726,
+      "grad_norm": 12.744550704956055,
+      "learning_rate": 1.0112332112332112e-05,
+      "loss": 0.3315,
+      "step": 4050
+    },
+    {
+      "epoch": 2.503052503052503,
+      "grad_norm": 30.4759464263916,
+      "learning_rate": 9.990231990231991e-06,
+      "loss": 0.2839,
+      "step": 4100
+    },
+    {
+      "epoch": 2.5335775335775335,
+      "grad_norm": 30.9493408203125,
+      "learning_rate": 9.86813186813187e-06,
+      "loss": 0.4101,
+      "step": 4150
+    },
+    {
+      "epoch": 2.564102564102564,
+      "grad_norm": 32.25123596191406,
+      "learning_rate": 9.746031746031747e-06,
+      "loss": 0.3538,
+      "step": 4200
+    },
+    {
+      "epoch": 2.5946275946275947,
+      "grad_norm": 23.044126510620117,
+      "learning_rate": 9.623931623931626e-06,
+      "loss": 0.3916,
+      "step": 4250
+    },
+    {
+      "epoch": 2.625152625152625,
+      "grad_norm": 12.858752250671387,
+      "learning_rate": 9.501831501831503e-06,
+      "loss": 0.3543,
+      "step": 4300
+    },
+    {
+      "epoch": 2.6556776556776556,
+      "grad_norm": 13.903925895690918,
+      "learning_rate": 9.37973137973138e-06,
+      "loss": 0.3419,
+      "step": 4350
+    },
+    {
+      "epoch": 2.6862026862026864,
+      "grad_norm": 19.525449752807617,
+      "learning_rate": 9.257631257631258e-06,
+      "loss": 0.3833,
+      "step": 4400
+    },
+    {
+      "epoch": 2.716727716727717,
+      "grad_norm": 77.14259338378906,
+      "learning_rate": 9.135531135531135e-06,
+      "loss": 0.3502,
+      "step": 4450
+    },
+    {
+      "epoch": 2.7472527472527473,
+      "grad_norm": 53.06977462768555,
+      "learning_rate": 9.013431013431014e-06,
+      "loss": 0.3557,
+      "step": 4500
+    },
+    {
+      "epoch": 2.7777777777777777,
+      "grad_norm": 1.970027208328247,
+      "learning_rate": 8.891330891330891e-06,
+      "loss": 0.3539,
+      "step": 4550
+    },
+    {
+      "epoch": 2.808302808302808,
+      "grad_norm": 11.040838241577148,
+      "learning_rate": 8.76923076923077e-06,
+      "loss": 0.4407,
+      "step": 4600
+    },
+    {
+      "epoch": 2.838827838827839,
+      "grad_norm": 3.146773099899292,
+      "learning_rate": 8.647130647130649e-06,
+      "loss": 0.3162,
+      "step": 4650
+    },
+    {
+      "epoch": 2.8693528693528694,
+      "grad_norm": 8.109123229980469,
+      "learning_rate": 8.525030525030526e-06,
+      "loss": 0.3643,
+      "step": 4700
+    },
+    {
+      "epoch": 2.8998778998779,
+      "grad_norm": 45.884132385253906,
+      "learning_rate": 8.402930402930404e-06,
+      "loss": 0.3473,
+      "step": 4750
+    },
+    {
+      "epoch": 2.9304029304029307,
+      "grad_norm": 45.499881744384766,
+      "learning_rate": 8.280830280830281e-06,
+      "loss": 0.3432,
+      "step": 4800
+    },
+    {
+      "epoch": 2.960927960927961,
+      "grad_norm": 8.839730262756348,
+      "learning_rate": 8.15873015873016e-06,
+      "loss": 0.4356,
+      "step": 4850
+    },
+    {
+      "epoch": 2.9914529914529915,
+      "grad_norm": 16.1923770904541,
+      "learning_rate": 8.036630036630037e-06,
+      "loss": 0.3048,
+      "step": 4900
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.841726618705036,
+      "eval_f1": 0.842039749114194,
+      "eval_loss": 0.4310351014137268,
+      "eval_runtime": 3.4121,
+      "eval_samples_per_second": 285.165,
+      "eval_steps_per_second": 17.878,
+      "step": 4914
+    },
+    {
+      "epoch": 3.021978021978022,
+      "grad_norm": 0.17316707968711853,
+      "learning_rate": 7.914529914529914e-06,
+      "loss": 0.3123,
+      "step": 4950
+    },
+    {
+      "epoch": 3.0525030525030523,
+      "grad_norm": 34.99089050292969,
+      "learning_rate": 7.792429792429793e-06,
+      "loss": 0.2688,
+      "step": 5000
+    },
+    {
+      "epoch": 3.083028083028083,
+      "grad_norm": 33.27500534057617,
+      "learning_rate": 7.670329670329671e-06,
+      "loss": 0.3351,
+      "step": 5050
+    },
+    {
+      "epoch": 3.1135531135531136,
+      "grad_norm": 1.5347567796707153,
+      "learning_rate": 7.5482295482295485e-06,
+      "loss": 0.2936,
+      "step": 5100
+    },
+    {
+      "epoch": 3.144078144078144,
+      "grad_norm": 41.4924430847168,
+      "learning_rate": 7.426129426129427e-06,
+      "loss": 0.3149,
+      "step": 5150
+    },
+    {
+      "epoch": 3.1746031746031744,
+      "grad_norm": 11.000214576721191,
+      "learning_rate": 7.304029304029304e-06,
+      "loss": 0.3578,
+      "step": 5200
+    },
+    {
+      "epoch": 3.2051282051282053,
+      "grad_norm": 44.290706634521484,
+      "learning_rate": 7.181929181929183e-06,
+      "loss": 0.3072,
+      "step": 5250
+    },
+    {
+      "epoch": 3.2356532356532357,
+      "grad_norm": 15.626100540161133,
+      "learning_rate": 7.059829059829061e-06,
+      "loss": 0.3477,
+      "step": 5300
+    },
+    {
+      "epoch": 3.266178266178266,
+      "grad_norm": 19.285181045532227,
+      "learning_rate": 6.937728937728938e-06,
+      "loss": 0.3064,
+      "step": 5350
+    },
+    {
+      "epoch": 3.2967032967032965,
+      "grad_norm": 44.707035064697266,
+      "learning_rate": 6.8156288156288165e-06,
+      "loss": 0.3293,
+      "step": 5400
+    },
+    {
+      "epoch": 3.3272283272283274,
+      "grad_norm": 0.5621269345283508,
+      "learning_rate": 6.6935286935286936e-06,
+      "loss": 0.2655,
+      "step": 5450
+    },
+    {
+      "epoch": 3.357753357753358,
+      "grad_norm": 44.706912994384766,
+      "learning_rate": 6.571428571428572e-06,
+      "loss": 0.3088,
+      "step": 5500
+    },
+    {
+      "epoch": 3.3882783882783882,
+      "grad_norm": 39.22804641723633,
+      "learning_rate": 6.44932844932845e-06,
+      "loss": 0.3369,
+      "step": 5550
+    },
+    {
+      "epoch": 3.4188034188034186,
+      "grad_norm": 22.248640060424805,
+      "learning_rate": 6.327228327228327e-06,
+      "loss": 0.2965,
+      "step": 5600
+    },
+    {
+      "epoch": 3.4493284493284495,
+      "grad_norm": 20.197702407836914,
+      "learning_rate": 6.205128205128206e-06,
+      "loss": 0.3755,
+      "step": 5650
+    },
+    {
+      "epoch": 3.47985347985348,
+      "grad_norm": 48.2933349609375,
+      "learning_rate": 6.083028083028083e-06,
+      "loss": 0.3001,
+      "step": 5700
+    },
+    {
+      "epoch": 3.5103785103785103,
+      "grad_norm": 44.70425796508789,
+      "learning_rate": 5.960927960927962e-06,
+      "loss": 0.3492,
+      "step": 5750
+    },
+    {
+      "epoch": 3.5409035409035408,
+      "grad_norm": 52.380699157714844,
+      "learning_rate": 5.8388278388278395e-06,
+      "loss": 0.3542,
+      "step": 5800
+    },
+    {
+      "epoch": 3.571428571428571,
+      "grad_norm": 61.83149337768555,
+      "learning_rate": 5.7167277167277165e-06,
+      "loss": 0.4119,
+      "step": 5850
+    },
+    {
+      "epoch": 3.601953601953602,
+      "grad_norm": 57.2952766418457,
+      "learning_rate": 5.594627594627595e-06,
+      "loss": 0.2609,
+      "step": 5900
+    },
+    {
+      "epoch": 3.6324786324786325,
+      "grad_norm": 9.41811466217041,
+      "learning_rate": 5.472527472527474e-06,
+      "loss": 0.2455,
+      "step": 5950
+    },
+    {
+      "epoch": 3.663003663003663,
+      "grad_norm": 21.504688262939453,
+      "learning_rate": 5.350427350427351e-06,
+      "loss": 0.2937,
+      "step": 6000
+    },
+    {
+      "epoch": 3.6935286935286937,
+      "grad_norm": 21.507137298583984,
+      "learning_rate": 5.228327228327229e-06,
+      "loss": 0.3562,
+      "step": 6050
+    },
+    {
+      "epoch": 3.724053724053724,
+      "grad_norm": 27.680004119873047,
+      "learning_rate": 5.106227106227107e-06,
+      "loss": 0.3818,
+      "step": 6100
+    },
+    {
+      "epoch": 3.7545787545787546,
+      "grad_norm": 3.6276087760925293,
+      "learning_rate": 4.9841269841269845e-06,
+      "loss": 0.3531,
+      "step": 6150
+    },
+    {
+      "epoch": 3.785103785103785,
+      "grad_norm": 7.672877311706543,
+      "learning_rate": 4.862026862026862e-06,
+      "loss": 0.3249,
+      "step": 6200
+    },
+    {
+      "epoch": 3.8156288156288154,
+      "grad_norm": 4.853017330169678,
+      "learning_rate": 4.73992673992674e-06,
+      "loss": 0.3108,
+      "step": 6250
+    },
+    {
+      "epoch": 3.8461538461538463,
+      "grad_norm": 5.495295524597168,
+      "learning_rate": 4.617826617826618e-06,
+      "loss": 0.2307,
+      "step": 6300
+    },
+    {
+      "epoch": 3.8766788766788767,
+      "grad_norm": 8.944286346435547,
+      "learning_rate": 4.495726495726496e-06,
+      "loss": 0.3389,
+      "step": 6350
+    },
+    {
+      "epoch": 3.907203907203907,
+      "grad_norm": 50.722347259521484,
+      "learning_rate": 4.373626373626374e-06,
+      "loss": 0.3585,
+      "step": 6400
+    },
+    {
+      "epoch": 3.937728937728938,
+      "grad_norm": 1.0614252090454102,
+      "learning_rate": 4.251526251526252e-06,
+      "loss": 0.2608,
+      "step": 6450
+    },
+    {
+      "epoch": 3.9682539682539684,
+      "grad_norm": 26.249635696411133,
+      "learning_rate": 4.12942612942613e-06,
+      "loss": 0.2843,
+      "step": 6500
+    },
+    {
+      "epoch": 3.998778998778999,
+      "grad_norm": 46.925880432128906,
+      "learning_rate": 4.0073260073260075e-06,
+      "loss": 0.3267,
+      "step": 6550
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.8252826310380267,
+      "eval_f1": 0.8320719787613731,
+      "eval_loss": 0.61274653673172,
+      "eval_runtime": 3.415,
+      "eval_samples_per_second": 284.919,
+      "eval_steps_per_second": 17.862,
+      "step": 6552
+    },
+    {
+      "epoch": 4.029304029304029,
+      "grad_norm": 6.32822847366333,
+      "learning_rate": 3.885225885225885e-06,
+      "loss": 0.2132,
+      "step": 6600
+    },
+    {
+      "epoch": 4.05982905982906,
+      "grad_norm": 36.05378341674805,
+      "learning_rate": 3.763125763125763e-06,
+      "loss": 0.2523,
+      "step": 6650
+    },
+    {
+      "epoch": 4.09035409035409,
+      "grad_norm": 0.1818905770778656,
+      "learning_rate": 3.641025641025641e-06,
+      "loss": 0.2759,
+      "step": 6700
+    },
+    {
+      "epoch": 4.1208791208791204,
+      "grad_norm": 1.4091908931732178,
+      "learning_rate": 3.5189255189255194e-06,
+      "loss": 0.2485,
+      "step": 6750
+    },
+    {
+      "epoch": 4.151404151404152,
+      "grad_norm": 124.52008056640625,
+      "learning_rate": 3.3968253968253972e-06,
+      "loss": 0.2202,
+      "step": 6800
+    },
+    {
+      "epoch": 4.181929181929182,
+      "grad_norm": 28.67030143737793,
+      "learning_rate": 3.274725274725275e-06,
+      "loss": 0.328,
+      "step": 6850
+    },
+    {
+      "epoch": 4.212454212454213,
+      "grad_norm": 53.90391159057617,
+      "learning_rate": 3.152625152625153e-06,
+      "loss": 0.3406,
+      "step": 6900
+    },
+    {
+      "epoch": 4.242979242979243,
+      "grad_norm": 0.3543091118335724,
+      "learning_rate": 3.0305250305250304e-06,
+      "loss": 0.2255,
+      "step": 6950
+    },
+    {
+      "epoch": 4.273504273504273,
+      "grad_norm": 5.9171319007873535,
+      "learning_rate": 2.9084249084249087e-06,
+      "loss": 0.1897,
+      "step": 7000
+    },
+    {
+      "epoch": 4.304029304029304,
+      "grad_norm": 121.3272705078125,
+      "learning_rate": 2.7863247863247866e-06,
+      "loss": 0.2379,
+      "step": 7050
+    },
+    {
+      "epoch": 4.334554334554334,
+      "grad_norm": 29.23149299621582,
+      "learning_rate": 2.6642246642246644e-06,
+      "loss": 0.2746,
+      "step": 7100
+    },
+    {
+      "epoch": 4.365079365079365,
+      "grad_norm": 0.29333066940307617,
+      "learning_rate": 2.5421245421245423e-06,
+      "loss": 0.235,
+      "step": 7150
+    },
+    {
+      "epoch": 4.395604395604396,
+      "grad_norm": 0.5657308101654053,
+      "learning_rate": 2.42002442002442e-06,
+      "loss": 0.2173,
+      "step": 7200
+    },
+    {
+      "epoch": 4.426129426129426,
+      "grad_norm": 65.32083129882812,
+      "learning_rate": 2.297924297924298e-06,
+      "loss": 0.2274,
+      "step": 7250
+    },
+    {
+      "epoch": 4.456654456654457,
+      "grad_norm": 46.64541244506836,
+      "learning_rate": 2.175824175824176e-06,
+      "loss": 0.2169,
+      "step": 7300
+    },
+    {
+      "epoch": 4.487179487179487,
+      "grad_norm": 33.644920349121094,
+      "learning_rate": 2.0537240537240538e-06,
+      "loss": 0.3825,
+      "step": 7350
+    },
+    {
+      "epoch": 4.517704517704518,
+      "grad_norm": 0.12038320302963257,
+      "learning_rate": 1.931623931623932e-06,
+      "loss": 0.2777,
+      "step": 7400
+    },
+    {
+      "epoch": 4.548229548229548,
+      "grad_norm": 0.2744602560997009,
+      "learning_rate": 1.8095238095238097e-06,
+      "loss": 0.2477,
+      "step": 7450
+    },
+    {
+      "epoch": 4.5787545787545785,
+      "grad_norm": 6.675881385803223,
+      "learning_rate": 1.6874236874236878e-06,
+      "loss": 0.2935,
+      "step": 7500
+    },
+    {
+      "epoch": 4.609279609279609,
+      "grad_norm": 0.19729621708393097,
+      "learning_rate": 1.5653235653235654e-06,
+      "loss": 0.2718,
+      "step": 7550
+    },
+    {
+      "epoch": 4.639804639804639,
+      "grad_norm": 10.316431045532227,
+      "learning_rate": 1.4432234432234433e-06,
+      "loss": 0.2216,
+      "step": 7600
+    },
+    {
+      "epoch": 4.670329670329671,
+      "grad_norm": 0.23969869315624237,
+      "learning_rate": 1.3211233211233212e-06,
+      "loss": 0.3216,
+      "step": 7650
+    },
+    {
+      "epoch": 4.700854700854701,
+      "grad_norm": 19.417451858520508,
+      "learning_rate": 1.1990231990231992e-06,
+      "loss": 0.308,
+      "step": 7700
+    },
+    {
+      "epoch": 4.7313797313797314,
+      "grad_norm": 3.6119327545166016,
+      "learning_rate": 1.076923076923077e-06,
+      "loss": 0.2515,
+      "step": 7750
+    },
+    {
+      "epoch": 4.761904761904762,
+      "grad_norm": 0.5512221455574036,
+      "learning_rate": 9.548229548229548e-07,
+      "loss": 0.2688,
+      "step": 7800
+    },
+    {
+      "epoch": 4.792429792429792,
+      "grad_norm": 1.3888872861862183,
+      "learning_rate": 8.327228327228327e-07,
+      "loss": 0.2449,
+      "step": 7850
+    },
+    {
+      "epoch": 4.822954822954823,
+      "grad_norm": 34.30708312988281,
+      "learning_rate": 7.106227106227107e-07,
+      "loss": 0.2926,
+      "step": 7900
+    },
+    {
+      "epoch": 4.853479853479853,
+      "grad_norm": 7.761233806610107,
+      "learning_rate": 5.885225885225886e-07,
+      "loss": 0.3384,
+      "step": 7950
+    },
+    {
+      "epoch": 4.884004884004884,
+      "grad_norm": 67.32460021972656,
+      "learning_rate": 4.664224664224665e-07,
+      "loss": 0.2586,
+      "step": 8000
+    },
+    {
+      "epoch": 4.914529914529915,
+      "grad_norm": 0.2525177299976349,
+      "learning_rate": 3.4432234432234435e-07,
+      "loss": 0.3106,
+      "step": 8050
+    },
+    {
+      "epoch": 4.945054945054945,
+      "grad_norm": 31.685503005981445,
+      "learning_rate": 2.2222222222222224e-07,
+      "loss": 0.277,
+      "step": 8100
+    },
+    {
+      "epoch": 4.975579975579976,
+      "grad_norm": 13.309945106506348,
+      "learning_rate": 1.0012210012210014e-07,
+      "loss": 0.286,
+      "step": 8150
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.8458376156217883,
+      "eval_f1": 0.847602022392802,
+      "eval_loss": 0.6701350808143616,
+      "eval_runtime": 3.429,
+      "eval_samples_per_second": 283.76,
+      "eval_steps_per_second": 17.79,
+      "step": 8190
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 8190,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4309101309158400.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

xlmr_hope_ml/checkpoint-8190/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c82fed9c3ff70f7c941da76fd2069a4d27a53f3fc0a11157cf37f02fe0dfd54e
+size 5841