Upload 16 files

Browse files

Files changed (16) hide show

checkpoint-2250/config.json +69 -0
checkpoint-2250/generation_config.json +16 -0
checkpoint-2250/model.safetensors +3 -0
checkpoint-2250/optimizer.pt +3 -0
checkpoint-2250/rng_state.pth +3 -0
checkpoint-2250/scheduler.pt +3 -0
checkpoint-2250/trainer_state.json +818 -0
checkpoint-2250/training_args.bin +3 -0
config.json +69 -0
generation_config.json +16 -0
merges.txt +0 -0
model.safetensors +3 -0
special_tokens_map.json +15 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
vocab.json +0 -0

checkpoint-2250/config.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "add_final_layer_norm": false,
+  "architectures": [
+    "BartForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 1024,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 4096,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": null,
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 4096,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 2,
+  "force_bos_token_to_be_generated": true,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_decoder": true,
+  "is_encoder_decoder": false,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "length_penalty": null,
+  "max_length": null,
+  "max_position_embeddings": 1024,
+  "min_length": null,
+  "model_type": "bart",
+  "no_repeat_ngram_size": null,
+  "normalize_before": false,
+  "num_beams": null,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 1,
+  "prefix": " ",
+  "scale_embedding": false,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.4",
+  "use_cache": true,
+  "vocab_size": 50264
+}

checkpoint-2250/generation_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "early_stopping": true,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "length_penalty": 2.0,
+  "max_length": 142,
+  "min_length": 56,
+  "no_repeat_ngram_size": 3,
+  "num_beams": 4,
+  "pad_token_id": 1,
+  "transformers_version": "4.52.4"
+}

checkpoint-2250/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74d34ed7efb5a94ca559d7d28c72f5c61aff49639cb20fdf3774e3fae7b355d7
+size 1016369648

checkpoint-2250/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d2b1ec36e2d17c6d9064563b8ff3ad38fd0085030205b4c3023e0e423d396a1
+size 1629580410

checkpoint-2250/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57d6afeb259c99a77dd9085eca0ede9f860c316c3a8e068727600fe1f506487f
+size 13990

checkpoint-2250/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bec953c174e41850d61f3908d88b9683a2b8a87c5e9752e8ae44ce839aa2004
+size 1064

checkpoint-2250/trainer_state.json ADDED Viewed

	@@ -0,0 +1,818 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 2250,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.008888888888888889,
+      "grad_norm": 29.719377517700195,
+      "learning_rate": 4.957777777777778e-05,
+      "loss": 9.931,
+      "step": 20
+    },
+    {
+      "epoch": 0.017777777777777778,
+      "grad_norm": 29.342676162719727,
+      "learning_rate": 4.913333333333334e-05,
+      "loss": 7.8363,
+      "step": 40
+    },
+    {
+      "epoch": 0.02666666666666667,
+      "grad_norm": 30.171451568603516,
+      "learning_rate": 4.868888888888889e-05,
+      "loss": 5.9641,
+      "step": 60
+    },
+    {
+      "epoch": 0.035555555555555556,
+      "grad_norm": 27.37605857849121,
+      "learning_rate": 4.824444444444445e-05,
+      "loss": 4.3778,
+      "step": 80
+    },
+    {
+      "epoch": 0.044444444444444446,
+      "grad_norm": 9.228110313415527,
+      "learning_rate": 4.78e-05,
+      "loss": 3.2025,
+      "step": 100
+    },
+    {
+      "epoch": 0.05333333333333334,
+      "grad_norm": 4.524937152862549,
+      "learning_rate": 4.7355555555555555e-05,
+      "loss": 2.8068,
+      "step": 120
+    },
+    {
+      "epoch": 0.06222222222222222,
+      "grad_norm": 2.696683168411255,
+      "learning_rate": 4.6911111111111114e-05,
+      "loss": 2.7047,
+      "step": 140
+    },
+    {
+      "epoch": 0.07111111111111111,
+      "grad_norm": 3.652080535888672,
+      "learning_rate": 4.646666666666667e-05,
+      "loss": 2.5618,
+      "step": 160
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 3.995697021484375,
+      "learning_rate": 4.602222222222222e-05,
+      "loss": 2.556,
+      "step": 180
+    },
+    {
+      "epoch": 0.08888888888888889,
+      "grad_norm": 4.581667423248291,
+      "learning_rate": 4.557777777777778e-05,
+      "loss": 2.4783,
+      "step": 200
+    },
+    {
+      "epoch": 0.09777777777777778,
+      "grad_norm": 2.2177352905273438,
+      "learning_rate": 4.513333333333333e-05,
+      "loss": 2.538,
+      "step": 220
+    },
+    {
+      "epoch": 0.10666666666666667,
+      "grad_norm": 26.386642456054688,
+      "learning_rate": 4.468888888888889e-05,
+      "loss": 2.4825,
+      "step": 240
+    },
+    {
+      "epoch": 0.11555555555555555,
+      "grad_norm": 3.9231057167053223,
+      "learning_rate": 4.424444444444444e-05,
+      "loss": 2.5041,
+      "step": 260
+    },
+    {
+      "epoch": 0.12444444444444444,
+      "grad_norm": 11.716802597045898,
+      "learning_rate": 4.38e-05,
+      "loss": 2.4963,
+      "step": 280
+    },
+    {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 9.851195335388184,
+      "learning_rate": 4.335555555555556e-05,
+      "loss": 2.3346,
+      "step": 300
+    },
+    {
+      "epoch": 0.14222222222222222,
+      "grad_norm": 43.87732696533203,
+      "learning_rate": 4.291111111111111e-05,
+      "loss": 2.5413,
+      "step": 320
+    },
+    {
+      "epoch": 0.1511111111111111,
+      "grad_norm": 11.769241333007812,
+      "learning_rate": 4.246666666666667e-05,
+      "loss": 2.5451,
+      "step": 340
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 5.8793463706970215,
+      "learning_rate": 4.2022222222222223e-05,
+      "loss": 2.3624,
+      "step": 360
+    },
+    {
+      "epoch": 0.1688888888888889,
+      "grad_norm": 2.2031548023223877,
+      "learning_rate": 4.157777777777778e-05,
+      "loss": 2.3517,
+      "step": 380
+    },
+    {
+      "epoch": 0.17777777777777778,
+      "grad_norm": 1.8488436937332153,
+      "learning_rate": 4.1133333333333335e-05,
+      "loss": 2.3944,
+      "step": 400
+    },
+    {
+      "epoch": 0.18666666666666668,
+      "grad_norm": 2.724890947341919,
+      "learning_rate": 4.0688888888888894e-05,
+      "loss": 2.3899,
+      "step": 420
+    },
+    {
+      "epoch": 0.19555555555555557,
+      "grad_norm": 27.8278751373291,
+      "learning_rate": 4.0244444444444446e-05,
+      "loss": 2.4001,
+      "step": 440
+    },
+    {
+      "epoch": 0.20444444444444446,
+      "grad_norm": 3.687330961227417,
+      "learning_rate": 3.9800000000000005e-05,
+      "loss": 2.4406,
+      "step": 460
+    },
+    {
+      "epoch": 0.21333333333333335,
+      "grad_norm": 12.961912155151367,
+      "learning_rate": 3.935555555555556e-05,
+      "loss": 2.4021,
+      "step": 480
+    },
+    {
+      "epoch": 0.2222222222222222,
+      "grad_norm": 2.5192787647247314,
+      "learning_rate": 3.8911111111111117e-05,
+      "loss": 2.345,
+      "step": 500
+    },
+    {
+      "epoch": 0.2311111111111111,
+      "grad_norm": 2.8883752822875977,
+      "learning_rate": 3.846666666666667e-05,
+      "loss": 2.2426,
+      "step": 520
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 3.0005998611450195,
+      "learning_rate": 3.802222222222223e-05,
+      "loss": 2.2591,
+      "step": 540
+    },
+    {
+      "epoch": 0.24888888888888888,
+      "grad_norm": 1.405104398727417,
+      "learning_rate": 3.757777777777778e-05,
+      "loss": 2.0771,
+      "step": 560
+    },
+    {
+      "epoch": 0.2577777777777778,
+      "grad_norm": 1.6733466386795044,
+      "learning_rate": 3.713333333333334e-05,
+      "loss": 2.031,
+      "step": 580
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 1.3471635580062866,
+      "learning_rate": 3.668888888888889e-05,
+      "loss": 2.0749,
+      "step": 600
+    },
+    {
+      "epoch": 0.27555555555555555,
+      "grad_norm": 1.6390342712402344,
+      "learning_rate": 3.624444444444445e-05,
+      "loss": 2.0469,
+      "step": 620
+    },
+    {
+      "epoch": 0.28444444444444444,
+      "grad_norm": 1.3594388961791992,
+      "learning_rate": 3.58e-05,
+      "loss": 2.0836,
+      "step": 640
+    },
+    {
+      "epoch": 0.29333333333333333,
+      "grad_norm": 1.5752489566802979,
+      "learning_rate": 3.5355555555555555e-05,
+      "loss": 2.0011,
+      "step": 660
+    },
+    {
+      "epoch": 0.3022222222222222,
+      "grad_norm": 2.0753142833709717,
+      "learning_rate": 3.4911111111111114e-05,
+      "loss": 2.1448,
+      "step": 680
+    },
+    {
+      "epoch": 0.3111111111111111,
+      "grad_norm": 1.4433261156082153,
+      "learning_rate": 3.4466666666666666e-05,
+      "loss": 1.9837,
+      "step": 700
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 1.2903432846069336,
+      "learning_rate": 3.402222222222222e-05,
+      "loss": 2.045,
+      "step": 720
+    },
+    {
+      "epoch": 0.3288888888888889,
+      "grad_norm": 1.120526671409607,
+      "learning_rate": 3.357777777777778e-05,
+      "loss": 2.0112,
+      "step": 740
+    },
+    {
+      "epoch": 0.3377777777777778,
+      "grad_norm": 1.4709597826004028,
+      "learning_rate": 3.313333333333333e-05,
+      "loss": 1.9936,
+      "step": 760
+    },
+    {
+      "epoch": 0.3466666666666667,
+      "grad_norm": 1.3369895219802856,
+      "learning_rate": 3.268888888888889e-05,
+      "loss": 1.9659,
+      "step": 780
+    },
+    {
+      "epoch": 0.35555555555555557,
+      "grad_norm": 1.1599270105361938,
+      "learning_rate": 3.224444444444444e-05,
+      "loss": 1.9923,
+      "step": 800
+    },
+    {
+      "epoch": 0.36444444444444446,
+      "grad_norm": 1.6374409198760986,
+      "learning_rate": 3.18e-05,
+      "loss": 1.9951,
+      "step": 820
+    },
+    {
+      "epoch": 0.37333333333333335,
+      "grad_norm": 1.8066844940185547,
+      "learning_rate": 3.135555555555555e-05,
+      "loss": 2.0328,
+      "step": 840
+    },
+    {
+      "epoch": 0.38222222222222224,
+      "grad_norm": 2.726418972015381,
+      "learning_rate": 3.091111111111111e-05,
+      "loss": 3.0763,
+      "step": 860
+    },
+    {
+      "epoch": 0.39111111111111113,
+      "grad_norm": 1.5949397087097168,
+      "learning_rate": 3.0466666666666664e-05,
+      "loss": 2.0093,
+      "step": 880
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 2.8492074012756348,
+      "learning_rate": 3.0022222222222223e-05,
+      "loss": 2.017,
+      "step": 900
+    },
+    {
+      "epoch": 0.4088888888888889,
+      "grad_norm": 1.2635244131088257,
+      "learning_rate": 2.9577777777777775e-05,
+      "loss": 2.0231,
+      "step": 920
+    },
+    {
+      "epoch": 0.4177777777777778,
+      "grad_norm": 2.1309561729431152,
+      "learning_rate": 2.9133333333333334e-05,
+      "loss": 2.1117,
+      "step": 940
+    },
+    {
+      "epoch": 0.4266666666666667,
+      "grad_norm": 39.7406005859375,
+      "learning_rate": 2.8688888888888894e-05,
+      "loss": 2.0463,
+      "step": 960
+    },
+    {
+      "epoch": 0.43555555555555553,
+      "grad_norm": 1.5083988904953003,
+      "learning_rate": 2.8244444444444446e-05,
+      "loss": 2.0512,
+      "step": 980
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "grad_norm": 1.9805152416229248,
+      "learning_rate": 2.7800000000000005e-05,
+      "loss": 2.0151,
+      "step": 1000
+    },
+    {
+      "epoch": 0.4533333333333333,
+      "grad_norm": 1.4856139421463013,
+      "learning_rate": 2.7355555555555557e-05,
+      "loss": 1.9718,
+      "step": 1020
+    },
+    {
+      "epoch": 0.4622222222222222,
+      "grad_norm": 1.5908904075622559,
+      "learning_rate": 2.6911111111111116e-05,
+      "loss": 1.9655,
+      "step": 1040
+    },
+    {
+      "epoch": 0.4711111111111111,
+      "grad_norm": 1.5324510335922241,
+      "learning_rate": 2.646666666666667e-05,
+      "loss": 1.93,
+      "step": 1060
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 1.6578165292739868,
+      "learning_rate": 2.6022222222222224e-05,
+      "loss": 1.9815,
+      "step": 1080
+    },
+    {
+      "epoch": 0.4888888888888889,
+      "grad_norm": 1.9318180084228516,
+      "learning_rate": 2.557777777777778e-05,
+      "loss": 1.9186,
+      "step": 1100
+    },
+    {
+      "epoch": 0.49777777777777776,
+      "grad_norm": 4.174930095672607,
+      "learning_rate": 2.5133333333333336e-05,
+      "loss": 1.9262,
+      "step": 1120
+    },
+    {
+      "epoch": 0.5066666666666667,
+      "grad_norm": 43.6900520324707,
+      "learning_rate": 2.4688888888888888e-05,
+      "loss": 1.9956,
+      "step": 1140
+    },
+    {
+      "epoch": 0.5155555555555555,
+      "grad_norm": 1.4572045803070068,
+      "learning_rate": 2.4244444444444443e-05,
+      "loss": 1.9548,
+      "step": 1160
+    },
+    {
+      "epoch": 0.5244444444444445,
+      "grad_norm": 3.6926634311676025,
+      "learning_rate": 2.38e-05,
+      "loss": 1.9629,
+      "step": 1180
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 1.4738409519195557,
+      "learning_rate": 2.3355555555555555e-05,
+      "loss": 2.01,
+      "step": 1200
+    },
+    {
+      "epoch": 0.5422222222222223,
+      "grad_norm": 2.078120231628418,
+      "learning_rate": 2.291111111111111e-05,
+      "loss": 2.0279,
+      "step": 1220
+    },
+    {
+      "epoch": 0.5511111111111111,
+      "grad_norm": 1.4616929292678833,
+      "learning_rate": 2.2466666666666666e-05,
+      "loss": 1.9756,
+      "step": 1240
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 1.8624461889266968,
+      "learning_rate": 2.2022222222222225e-05,
+      "loss": 1.9866,
+      "step": 1260
+    },
+    {
+      "epoch": 0.5688888888888889,
+      "grad_norm": 2.9291300773620605,
+      "learning_rate": 2.157777777777778e-05,
+      "loss": 2.0013,
+      "step": 1280
+    },
+    {
+      "epoch": 0.5777777777777777,
+      "grad_norm": 1.6963049173355103,
+      "learning_rate": 2.1133333333333337e-05,
+      "loss": 1.9604,
+      "step": 1300
+    },
+    {
+      "epoch": 0.5866666666666667,
+      "grad_norm": 4.135189056396484,
+      "learning_rate": 2.0688888888888892e-05,
+      "loss": 1.9069,
+      "step": 1320
+    },
+    {
+      "epoch": 0.5955555555555555,
+      "grad_norm": 1.9753628969192505,
+      "learning_rate": 2.0244444444444448e-05,
+      "loss": 1.8923,
+      "step": 1340
+    },
+    {
+      "epoch": 0.6044444444444445,
+      "grad_norm": 1.6017193794250488,
+      "learning_rate": 1.9800000000000004e-05,
+      "loss": 2.0029,
+      "step": 1360
+    },
+    {
+      "epoch": 0.6133333333333333,
+      "grad_norm": 10.354166030883789,
+      "learning_rate": 1.9355555555555556e-05,
+      "loss": 2.0181,
+      "step": 1380
+    },
+    {
+      "epoch": 0.6222222222222222,
+      "grad_norm": 1.5218088626861572,
+      "learning_rate": 1.891111111111111e-05,
+      "loss": 1.9389,
+      "step": 1400
+    },
+    {
+      "epoch": 0.6311111111111111,
+      "grad_norm": 3.808530569076538,
+      "learning_rate": 1.8466666666666667e-05,
+      "loss": 2.0232,
+      "step": 1420
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 2.383110761642456,
+      "learning_rate": 1.8022222222222223e-05,
+      "loss": 2.0305,
+      "step": 1440
+    },
+    {
+      "epoch": 0.6488888888888888,
+      "grad_norm": 3.2716569900512695,
+      "learning_rate": 1.757777777777778e-05,
+      "loss": 2.0708,
+      "step": 1460
+    },
+    {
+      "epoch": 0.6577777777777778,
+      "grad_norm": 28.891468048095703,
+      "learning_rate": 1.7133333333333334e-05,
+      "loss": 2.2954,
+      "step": 1480
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 2.7744553089141846,
+      "learning_rate": 1.668888888888889e-05,
+      "loss": 2.1343,
+      "step": 1500
+    },
+    {
+      "epoch": 0.6755555555555556,
+      "grad_norm": 3.43209171295166,
+      "learning_rate": 1.6244444444444446e-05,
+      "loss": 2.1159,
+      "step": 1520
+    },
+    {
+      "epoch": 0.6844444444444444,
+      "grad_norm": 1.6540969610214233,
+      "learning_rate": 1.58e-05,
+      "loss": 1.9926,
+      "step": 1540
+    },
+    {
+      "epoch": 0.6933333333333334,
+      "grad_norm": 1.8530590534210205,
+      "learning_rate": 1.5355555555555557e-05,
+      "loss": 1.9936,
+      "step": 1560
+    },
+    {
+      "epoch": 0.7022222222222222,
+      "grad_norm": 8.5066556930542,
+      "learning_rate": 1.4911111111111113e-05,
+      "loss": 2.04,
+      "step": 1580
+    },
+    {
+      "epoch": 0.7111111111111111,
+      "grad_norm": 3.0773701667785645,
+      "learning_rate": 1.4466666666666667e-05,
+      "loss": 2.0861,
+      "step": 1600
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 1.7106276750564575,
+      "learning_rate": 1.4022222222222222e-05,
+      "loss": 1.9936,
+      "step": 1620
+    },
+    {
+      "epoch": 0.7288888888888889,
+      "grad_norm": 8.246357917785645,
+      "learning_rate": 1.3577777777777778e-05,
+      "loss": 2.0049,
+      "step": 1640
+    },
+    {
+      "epoch": 0.7377777777777778,
+      "grad_norm": 1.527330756187439,
+      "learning_rate": 1.3133333333333334e-05,
+      "loss": 1.9879,
+      "step": 1660
+    },
+    {
+      "epoch": 0.7466666666666667,
+      "grad_norm": 1.4043477773666382,
+      "learning_rate": 1.268888888888889e-05,
+      "loss": 1.952,
+      "step": 1680
+    },
+    {
+      "epoch": 0.7555555555555555,
+      "grad_norm": 1.3344300985336304,
+      "learning_rate": 1.2244444444444445e-05,
+      "loss": 2.0598,
+      "step": 1700
+    },
+    {
+      "epoch": 0.7644444444444445,
+      "grad_norm": 1.5034434795379639,
+      "learning_rate": 1.18e-05,
+      "loss": 2.0192,
+      "step": 1720
+    },
+    {
+      "epoch": 0.7733333333333333,
+      "grad_norm": 1.9273996353149414,
+      "learning_rate": 1.1355555555555556e-05,
+      "loss": 1.9587,
+      "step": 1740
+    },
+    {
+      "epoch": 0.7822222222222223,
+      "grad_norm": 1.5485738515853882,
+      "learning_rate": 1.0911111111111112e-05,
+      "loss": 2.0159,
+      "step": 1760
+    },
+    {
+      "epoch": 0.7911111111111111,
+      "grad_norm": 2.0449929237365723,
+      "learning_rate": 1.0466666666666668e-05,
+      "loss": 1.9578,
+      "step": 1780
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 1.3243677616119385,
+      "learning_rate": 1.0022222222222223e-05,
+      "loss": 1.9961,
+      "step": 1800
+    },
+    {
+      "epoch": 0.8088888888888889,
+      "grad_norm": 2.067218542098999,
+      "learning_rate": 9.577777777777779e-06,
+      "loss": 1.9834,
+      "step": 1820
+    },
+    {
+      "epoch": 0.8177777777777778,
+      "grad_norm": 1.5382347106933594,
+      "learning_rate": 9.133333333333335e-06,
+      "loss": 1.9732,
+      "step": 1840
+    },
+    {
+      "epoch": 0.8266666666666667,
+      "grad_norm": 1.530358910560608,
+      "learning_rate": 8.68888888888889e-06,
+      "loss": 1.92,
+      "step": 1860
+    },
+    {
+      "epoch": 0.8355555555555556,
+      "grad_norm": 1.3091119527816772,
+      "learning_rate": 8.244444444444444e-06,
+      "loss": 1.9412,
+      "step": 1880
+    },
+    {
+      "epoch": 0.8444444444444444,
+      "grad_norm": 1.3711830377578735,
+      "learning_rate": 7.8e-06,
+      "loss": 1.9652,
+      "step": 1900
+    },
+    {
+      "epoch": 0.8533333333333334,
+      "grad_norm": 1.6069121360778809,
+      "learning_rate": 7.3555555555555555e-06,
+      "loss": 2.1216,
+      "step": 1920
+    },
+    {
+      "epoch": 0.8622222222222222,
+      "grad_norm": 2.2424278259277344,
+      "learning_rate": 6.911111111111111e-06,
+      "loss": 1.9446,
+      "step": 1940
+    },
+    {
+      "epoch": 0.8711111111111111,
+      "grad_norm": 1.6427901983261108,
+      "learning_rate": 6.466666666666667e-06,
+      "loss": 1.9955,
+      "step": 1960
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 2.817962646484375,
+      "learning_rate": 6.0222222222222225e-06,
+      "loss": 1.9938,
+      "step": 1980
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 2.454206705093384,
+      "learning_rate": 5.577777777777778e-06,
+      "loss": 1.9823,
+      "step": 2000
+    },
+    {
+      "epoch": 0.8977777777777778,
+      "grad_norm": 1.576393961906433,
+      "learning_rate": 5.133333333333334e-06,
+      "loss": 1.9354,
+      "step": 2020
+    },
+    {
+      "epoch": 0.9066666666666666,
+      "grad_norm": 2.2149758338928223,
+      "learning_rate": 4.6888888888888895e-06,
+      "loss": 1.9377,
+      "step": 2040
+    },
+    {
+      "epoch": 0.9155555555555556,
+      "grad_norm": 9.432432174682617,
+      "learning_rate": 4.244444444444444e-06,
+      "loss": 1.967,
+      "step": 2060
+    },
+    {
+      "epoch": 0.9244444444444444,
+      "grad_norm": 1.6415536403656006,
+      "learning_rate": 3.8e-06,
+      "loss": 2.0043,
+      "step": 2080
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 1.3895797729492188,
+      "learning_rate": 3.3555555555555557e-06,
+      "loss": 1.9757,
+      "step": 2100
+    },
+    {
+      "epoch": 0.9422222222222222,
+      "grad_norm": 1.7504130601882935,
+      "learning_rate": 2.9111111111111114e-06,
+      "loss": 1.9506,
+      "step": 2120
+    },
+    {
+      "epoch": 0.9511111111111111,
+      "grad_norm": 2.290969133377075,
+      "learning_rate": 2.4666666666666666e-06,
+      "loss": 1.9525,
+      "step": 2140
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 1.4756875038146973,
+      "learning_rate": 2.0222222222222223e-06,
+      "loss": 2.0076,
+      "step": 2160
+    },
+    {
+      "epoch": 0.9688888888888889,
+      "grad_norm": 9.54588794708252,
+      "learning_rate": 1.577777777777778e-06,
+      "loss": 1.9536,
+      "step": 2180
+    },
+    {
+      "epoch": 0.9777777777777777,
+      "grad_norm": 7.211913108825684,
+      "learning_rate": 1.1333333333333334e-06,
+      "loss": 1.9539,
+      "step": 2200
+    },
+    {
+      "epoch": 0.9866666666666667,
+      "grad_norm": 2.0712270736694336,
+      "learning_rate": 6.888888888888889e-07,
+      "loss": 2.0026,
+      "step": 2220
+    },
+    {
+      "epoch": 0.9955555555555555,
+      "grad_norm": 1.6621143817901611,
+      "learning_rate": 2.4444444444444445e-07,
+      "loss": 1.9709,
+      "step": 2240
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 2250,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1393197318144000.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2250/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e80e264647457cd090f5619636a58dde3a50daacc43d528c97b1cd53b5c0a888
+size 5240

config.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "add_final_layer_norm": false,
+  "architectures": [
+    "BartForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 1024,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 4096,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": null,
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 4096,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 2,
+  "force_bos_token_to_be_generated": true,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_decoder": true,
+  "is_encoder_decoder": false,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "length_penalty": null,
+  "max_length": null,
+  "max_position_embeddings": 1024,
+  "min_length": null,
+  "model_type": "bart",
+  "no_repeat_ngram_size": null,
+  "normalize_before": false,
+  "num_beams": null,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 1,
+  "prefix": " ",
+  "scale_embedding": false,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.4",
+  "use_cache": true,
+  "vocab_size": 50264
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "early_stopping": true,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "length_penalty": 2.0,
+  "max_length": 142,
+  "min_length": 56,
+  "no_repeat_ngram_size": 3,
+  "num_beams": 4,
+  "pad_token_id": 1,
+  "transformers_version": "4.52.4"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74d34ed7efb5a94ca559d7d28c72f5c61aff49639cb20fdf3774e3fae7b355d7
+size 1016369648

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "BartTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff