diff --git a/checkpoint-10624/config.json b/checkpoint-10624/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-10624/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-10624/generation_config.json b/checkpoint-10624/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-10624/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-10624/model.safetensors b/checkpoint-10624/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b5f613df10d258ac6dd82ca27fdc95aba2c57351
--- /dev/null
+++ b/checkpoint-10624/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2a52a542fc460795d1eea30717f34069c2ac222ad447e5856a482d9f306f637
+size 242041896
diff --git a/checkpoint-10624/optimizer.pt b/checkpoint-10624/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..637759d9389805f0d6c2590d1fe9cec53626d32b
--- /dev/null
+++ b/checkpoint-10624/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:626a689b9399ca5bc9be0ba4001343c82189ae6b5f17c2f154f5f420be8c8efd
+size 484163514
diff --git a/checkpoint-10624/rng_state.pth b/checkpoint-10624/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..80e9ce9dc2eed45c4ad0276ba2aea9f9d62d4822
--- /dev/null
+++ b/checkpoint-10624/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:871dae08d4b0f588e7fb11dfff89a25046714f93a8b345b0a8564e6033959fb5
+size 14244
diff --git a/checkpoint-10624/scheduler.pt b/checkpoint-10624/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2a0b2ee1e036e9fbdcb114f010b7a5bd9ebcaa47
--- /dev/null
+++ b/checkpoint-10624/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff5e9fe88ecbc6317ecb28b185c99a3462fbdc58b45639f3e0d8c2dc79b5584f
+size 1064
diff --git a/checkpoint-10624/trainer_state.json b/checkpoint-10624/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..4487304eda7caacf671c1a6ef8aaa5346569f873
--- /dev/null
+++ b/checkpoint-10624/trainer_state.json
@@ -0,0 +1,180 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 16.0,
+  "eval_steps": 500,
+  "global_step": 10624,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    },
+    {
+      "epoch": 8.283132530120483,
+      "grad_norm": 0.05083702132105827,
+      "learning_rate": 0.0002929216867469879,
+      "loss": 0.028,
+      "step": 5500
+    },
+    {
+      "epoch": 9.036144578313253,
+      "grad_norm": 0.073179692029953,
+      "learning_rate": 0.0002740963855421687,
+      "loss": 0.0275,
+      "step": 6000
+    },
+    {
+      "epoch": 9.789156626506024,
+      "grad_norm": 0.060432616621255875,
+      "learning_rate": 0.0002552710843373494,
+      "loss": 0.0266,
+      "step": 6500
+    },
+    {
+      "epoch": 10.542168674698795,
+      "grad_norm": 0.05641400068998337,
+      "learning_rate": 0.00023644578313253013,
+      "loss": 0.0265,
+      "step": 7000
+    },
+    {
+      "epoch": 11.295180722891565,
+      "grad_norm": 0.055228352546691895,
+      "learning_rate": 0.00021762048192771087,
+      "loss": 0.0257,
+      "step": 7500
+    },
+    {
+      "epoch": 12.048192771084338,
+      "grad_norm": 0.055986884981393814,
+      "learning_rate": 0.00019879518072289158,
+      "loss": 0.0254,
+      "step": 8000
+    },
+    {
+      "epoch": 12.801204819277109,
+      "grad_norm": 0.06879087537527084,
+      "learning_rate": 0.0001799698795180723,
+      "loss": 0.025,
+      "step": 8500
+    },
+    {
+      "epoch": 13.55421686746988,
+      "grad_norm": 0.08162941783666611,
+      "learning_rate": 0.00016114457831325303,
+      "loss": 0.0248,
+      "step": 9000
+    },
+    {
+      "epoch": 14.30722891566265,
+      "grad_norm": 0.0502689927816391,
+      "learning_rate": 0.00014231927710843374,
+      "loss": 0.0242,
+      "step": 9500
+    },
+    {
+      "epoch": 15.060240963855422,
+      "grad_norm": 0.052483588457107544,
+      "learning_rate": 0.00012349397590361445,
+      "loss": 0.0245,
+      "step": 10000
+    },
+    {
+      "epoch": 15.813253012048193,
+      "grad_norm": 0.04214683175086975,
+      "learning_rate": 0.00010466867469879517,
+      "loss": 0.0238,
+      "step": 10500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5744176738074624.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-10624/training_args.bin b/checkpoint-10624/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-10624/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-11288/config.json b/checkpoint-11288/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-11288/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-11288/generation_config.json b/checkpoint-11288/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-11288/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-11288/model.safetensors b/checkpoint-11288/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..999ba4d5d35cfe6e8907002b37ef75cc39e3c0f6
--- /dev/null
+++ b/checkpoint-11288/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d30b7966eb336d8ee47c0a75a7ba2a9369f2b9f5ff64ec647694b39217a6fe3
+size 242041896
diff --git a/checkpoint-11288/optimizer.pt b/checkpoint-11288/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6296790a6b4555028724b89099ea4c5e6d441be0
--- /dev/null
+++ b/checkpoint-11288/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:225d5d0cebb7017617e05571854333032b6acc41d71a97baa451858b61dc93e9
+size 484163514
diff --git a/checkpoint-11288/rng_state.pth b/checkpoint-11288/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a5a878f5fb1255aa849f7f7458a72e804c6ad730
--- /dev/null
+++ b/checkpoint-11288/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea982c40bf13ee6d59e20a0c92fb57845229dff0ba14bb916750b0adb8f60d26
+size 14244
diff --git a/checkpoint-11288/scheduler.pt b/checkpoint-11288/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9a60738b15b74fb7a574c18563e7a777c4d6e33b
--- /dev/null
+++ b/checkpoint-11288/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f49e004a60a12580012411533599c9840ef90bb62ac0e44a6af9f00aa574415
+size 1064
diff --git a/checkpoint-11288/trainer_state.json b/checkpoint-11288/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..cac0194474ca437485e99478782ed879ca9b16aa
--- /dev/null
+++ b/checkpoint-11288/trainer_state.json
@@ -0,0 +1,187 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 17.0,
+  "eval_steps": 500,
+  "global_step": 11288,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    },
+    {
+      "epoch": 8.283132530120483,
+      "grad_norm": 0.05083702132105827,
+      "learning_rate": 0.0002929216867469879,
+      "loss": 0.028,
+      "step": 5500
+    },
+    {
+      "epoch": 9.036144578313253,
+      "grad_norm": 0.073179692029953,
+      "learning_rate": 0.0002740963855421687,
+      "loss": 0.0275,
+      "step": 6000
+    },
+    {
+      "epoch": 9.789156626506024,
+      "grad_norm": 0.060432616621255875,
+      "learning_rate": 0.0002552710843373494,
+      "loss": 0.0266,
+      "step": 6500
+    },
+    {
+      "epoch": 10.542168674698795,
+      "grad_norm": 0.05641400068998337,
+      "learning_rate": 0.00023644578313253013,
+      "loss": 0.0265,
+      "step": 7000
+    },
+    {
+      "epoch": 11.295180722891565,
+      "grad_norm": 0.055228352546691895,
+      "learning_rate": 0.00021762048192771087,
+      "loss": 0.0257,
+      "step": 7500
+    },
+    {
+      "epoch": 12.048192771084338,
+      "grad_norm": 0.055986884981393814,
+      "learning_rate": 0.00019879518072289158,
+      "loss": 0.0254,
+      "step": 8000
+    },
+    {
+      "epoch": 12.801204819277109,
+      "grad_norm": 0.06879087537527084,
+      "learning_rate": 0.0001799698795180723,
+      "loss": 0.025,
+      "step": 8500
+    },
+    {
+      "epoch": 13.55421686746988,
+      "grad_norm": 0.08162941783666611,
+      "learning_rate": 0.00016114457831325303,
+      "loss": 0.0248,
+      "step": 9000
+    },
+    {
+      "epoch": 14.30722891566265,
+      "grad_norm": 0.0502689927816391,
+      "learning_rate": 0.00014231927710843374,
+      "loss": 0.0242,
+      "step": 9500
+    },
+    {
+      "epoch": 15.060240963855422,
+      "grad_norm": 0.052483588457107544,
+      "learning_rate": 0.00012349397590361445,
+      "loss": 0.0245,
+      "step": 10000
+    },
+    {
+      "epoch": 15.813253012048193,
+      "grad_norm": 0.04214683175086975,
+      "learning_rate": 0.00010466867469879517,
+      "loss": 0.0238,
+      "step": 10500
+    },
+    {
+      "epoch": 16.566265060240966,
+      "grad_norm": 0.03767360374331474,
+      "learning_rate": 8.58433734939759e-05,
+      "loss": 0.0239,
+      "step": 11000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6103187784204288.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-11288/training_args.bin b/checkpoint-11288/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-11288/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-11952/config.json b/checkpoint-11952/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-11952/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-11952/generation_config.json b/checkpoint-11952/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-11952/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-11952/model.safetensors b/checkpoint-11952/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..62eeb003177d9a78dd4d5c8a420cf61c9bd8d468
--- /dev/null
+++ b/checkpoint-11952/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2eb96a9f08127ec7592e050ea89cd120320db5b74966dc5656609bbd208511c2
+size 242041896
diff --git a/checkpoint-11952/optimizer.pt b/checkpoint-11952/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b982611e2cadc740001eefe5114a8bdaef42804f
--- /dev/null
+++ b/checkpoint-11952/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9cce771a11a0f6043f4a77c724a7499306145b423778e42ee3413da80892d3d
+size 484163514
diff --git a/checkpoint-11952/rng_state.pth b/checkpoint-11952/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d8088e255e0e2a38088857ba32eb7a2168a175d9
--- /dev/null
+++ b/checkpoint-11952/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c503ee05f88e202769d6afdb9591580059e32575c1447b22ecf57bac3a49734
+size 14244
diff --git a/checkpoint-11952/scheduler.pt b/checkpoint-11952/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c73e4454f93b631ca00379de0f530371c0e6e2ab
--- /dev/null
+++ b/checkpoint-11952/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22cc1df3a420b1528700cb2c2255107d6d7b4d17b007299a04e9c786c3c472a4
+size 1064
diff --git a/checkpoint-11952/trainer_state.json b/checkpoint-11952/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb48e71b44fd2e762f85d07b6602c7f684cc8f
--- /dev/null
+++ b/checkpoint-11952/trainer_state.json
@@ -0,0 +1,194 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 18.0,
+  "eval_steps": 500,
+  "global_step": 11952,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    },
+    {
+      "epoch": 8.283132530120483,
+      "grad_norm": 0.05083702132105827,
+      "learning_rate": 0.0002929216867469879,
+      "loss": 0.028,
+      "step": 5500
+    },
+    {
+      "epoch": 9.036144578313253,
+      "grad_norm": 0.073179692029953,
+      "learning_rate": 0.0002740963855421687,
+      "loss": 0.0275,
+      "step": 6000
+    },
+    {
+      "epoch": 9.789156626506024,
+      "grad_norm": 0.060432616621255875,
+      "learning_rate": 0.0002552710843373494,
+      "loss": 0.0266,
+      "step": 6500
+    },
+    {
+      "epoch": 10.542168674698795,
+      "grad_norm": 0.05641400068998337,
+      "learning_rate": 0.00023644578313253013,
+      "loss": 0.0265,
+      "step": 7000
+    },
+    {
+      "epoch": 11.295180722891565,
+      "grad_norm": 0.055228352546691895,
+      "learning_rate": 0.00021762048192771087,
+      "loss": 0.0257,
+      "step": 7500
+    },
+    {
+      "epoch": 12.048192771084338,
+      "grad_norm": 0.055986884981393814,
+      "learning_rate": 0.00019879518072289158,
+      "loss": 0.0254,
+      "step": 8000
+    },
+    {
+      "epoch": 12.801204819277109,
+      "grad_norm": 0.06879087537527084,
+      "learning_rate": 0.0001799698795180723,
+      "loss": 0.025,
+      "step": 8500
+    },
+    {
+      "epoch": 13.55421686746988,
+      "grad_norm": 0.08162941783666611,
+      "learning_rate": 0.00016114457831325303,
+      "loss": 0.0248,
+      "step": 9000
+    },
+    {
+      "epoch": 14.30722891566265,
+      "grad_norm": 0.0502689927816391,
+      "learning_rate": 0.00014231927710843374,
+      "loss": 0.0242,
+      "step": 9500
+    },
+    {
+      "epoch": 15.060240963855422,
+      "grad_norm": 0.052483588457107544,
+      "learning_rate": 0.00012349397590361445,
+      "loss": 0.0245,
+      "step": 10000
+    },
+    {
+      "epoch": 15.813253012048193,
+      "grad_norm": 0.04214683175086975,
+      "learning_rate": 0.00010466867469879517,
+      "loss": 0.0238,
+      "step": 10500
+    },
+    {
+      "epoch": 16.566265060240966,
+      "grad_norm": 0.03767360374331474,
+      "learning_rate": 8.58433734939759e-05,
+      "loss": 0.0239,
+      "step": 11000
+    },
+    {
+      "epoch": 17.319277108433734,
+      "grad_norm": 0.04902500659227371,
+      "learning_rate": 6.701807228915662e-05,
+      "loss": 0.0234,
+      "step": 11500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6462198830333952.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-11952/training_args.bin b/checkpoint-11952/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-11952/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-12616/config.json b/checkpoint-12616/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-12616/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-12616/generation_config.json b/checkpoint-12616/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-12616/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-12616/model.safetensors b/checkpoint-12616/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..72c97f274d8de5ddeff84f8fcac255df2538ca7b
--- /dev/null
+++ b/checkpoint-12616/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e6b7740a417775a39ca07bbcb3f19c41d1de00ef5d3c600eb612b81f032d12a
+size 242041896
diff --git a/checkpoint-12616/optimizer.pt b/checkpoint-12616/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb1186611179290409a3e793d57b24b4c4038e6e
--- /dev/null
+++ b/checkpoint-12616/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:298fd0e26e0e50e6407d3e5b11ce54997833eae70441b852ffc0ce668a639ccc
+size 484163514
diff --git a/checkpoint-12616/rng_state.pth b/checkpoint-12616/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0862a9631ba1d610bab2e11aa1a7bef4b80c2f1a
--- /dev/null
+++ b/checkpoint-12616/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21b5b2d0d4ef3a10347f33189f220aa44bc97dadc5d71a227e0fb3e22c380ff6
+size 14244
diff --git a/checkpoint-12616/scheduler.pt b/checkpoint-12616/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..83ee3334a46b18a95261753b2f3689451785b2eb
--- /dev/null
+++ b/checkpoint-12616/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:822b1fd7a3f099d382f34877c8a7ac2274ddcadf9bb3122afba35168bc2788e6
+size 1064
diff --git a/checkpoint-12616/trainer_state.json b/checkpoint-12616/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..de0ed48a1636858d1034dad4d77886834a53b9eb
--- /dev/null
+++ b/checkpoint-12616/trainer_state.json
@@ -0,0 +1,208 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 19.0,
+  "eval_steps": 500,
+  "global_step": 12616,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    },
+    {
+      "epoch": 8.283132530120483,
+      "grad_norm": 0.05083702132105827,
+      "learning_rate": 0.0002929216867469879,
+      "loss": 0.028,
+      "step": 5500
+    },
+    {
+      "epoch": 9.036144578313253,
+      "grad_norm": 0.073179692029953,
+      "learning_rate": 0.0002740963855421687,
+      "loss": 0.0275,
+      "step": 6000
+    },
+    {
+      "epoch": 9.789156626506024,
+      "grad_norm": 0.060432616621255875,
+      "learning_rate": 0.0002552710843373494,
+      "loss": 0.0266,
+      "step": 6500
+    },
+    {
+      "epoch": 10.542168674698795,
+      "grad_norm": 0.05641400068998337,
+      "learning_rate": 0.00023644578313253013,
+      "loss": 0.0265,
+      "step": 7000
+    },
+    {
+      "epoch": 11.295180722891565,
+      "grad_norm": 0.055228352546691895,
+      "learning_rate": 0.00021762048192771087,
+      "loss": 0.0257,
+      "step": 7500
+    },
+    {
+      "epoch": 12.048192771084338,
+      "grad_norm": 0.055986884981393814,
+      "learning_rate": 0.00019879518072289158,
+      "loss": 0.0254,
+      "step": 8000
+    },
+    {
+      "epoch": 12.801204819277109,
+      "grad_norm": 0.06879087537527084,
+      "learning_rate": 0.0001799698795180723,
+      "loss": 0.025,
+      "step": 8500
+    },
+    {
+      "epoch": 13.55421686746988,
+      "grad_norm": 0.08162941783666611,
+      "learning_rate": 0.00016114457831325303,
+      "loss": 0.0248,
+      "step": 9000
+    },
+    {
+      "epoch": 14.30722891566265,
+      "grad_norm": 0.0502689927816391,
+      "learning_rate": 0.00014231927710843374,
+      "loss": 0.0242,
+      "step": 9500
+    },
+    {
+      "epoch": 15.060240963855422,
+      "grad_norm": 0.052483588457107544,
+      "learning_rate": 0.00012349397590361445,
+      "loss": 0.0245,
+      "step": 10000
+    },
+    {
+      "epoch": 15.813253012048193,
+      "grad_norm": 0.04214683175086975,
+      "learning_rate": 0.00010466867469879517,
+      "loss": 0.0238,
+      "step": 10500
+    },
+    {
+      "epoch": 16.566265060240966,
+      "grad_norm": 0.03767360374331474,
+      "learning_rate": 8.58433734939759e-05,
+      "loss": 0.0239,
+      "step": 11000
+    },
+    {
+      "epoch": 17.319277108433734,
+      "grad_norm": 0.04902500659227371,
+      "learning_rate": 6.701807228915662e-05,
+      "loss": 0.0234,
+      "step": 11500
+    },
+    {
+      "epoch": 18.072289156626507,
+      "grad_norm": 0.058824148029088974,
+      "learning_rate": 4.8192771084337354e-05,
+      "loss": 0.0232,
+      "step": 12000
+    },
+    {
+      "epoch": 18.825301204819276,
+      "grad_norm": 0.06361762434244156,
+      "learning_rate": 2.9367469879518075e-05,
+      "loss": 0.0231,
+      "step": 12500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6821209876463616.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-12616/training_args.bin b/checkpoint-12616/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-12616/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-1328/config.json b/checkpoint-1328/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-1328/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-1328/generation_config.json b/checkpoint-1328/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-1328/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-1328/model.safetensors b/checkpoint-1328/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5199b8eef8710b41a0f8805807204836d91faed4
--- /dev/null
+++ b/checkpoint-1328/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cb90960a87bb689d468344aeff8b4e9d5590b017945b0a486ddb999a6542813
+size 242041896
diff --git a/checkpoint-1328/optimizer.pt b/checkpoint-1328/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d42a316334aec1d47f29992b951b97c311b05bc
--- /dev/null
+++ b/checkpoint-1328/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5100188e6dcc93a1184d247117fb0df100240535a266ec5f78e8b8bd3943062e
+size 484163514
diff --git a/checkpoint-1328/rng_state.pth b/checkpoint-1328/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e3ba157c04424f8d197eac69f4dc419170034322
--- /dev/null
+++ b/checkpoint-1328/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8737df5603a32aae5e5e9c2da6de40078a49291ae4477cd8b2ae7c3f57890fbc
+size 14244
diff --git a/checkpoint-1328/scheduler.pt b/checkpoint-1328/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..62518a06aaada65952059dc1dc9b4587f3bde683
--- /dev/null
+++ b/checkpoint-1328/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b51d887a83d6c3823b34be1c27e0a48d7412ba51ea0a6f9ef63aa05d8faea20
+size 1064
diff --git a/checkpoint-1328/trainer_state.json b/checkpoint-1328/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..0e04ea27f6f756183b8dbdecfa9d850e34fda7b8
--- /dev/null
+++ b/checkpoint-1328/trainer_state.json
@@ -0,0 +1,47 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 1328,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 718022092259328.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-1328/training_args.bin b/checkpoint-1328/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-1328/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-13280/config.json b/checkpoint-13280/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-13280/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-13280/generation_config.json b/checkpoint-13280/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-13280/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-13280/model.safetensors b/checkpoint-13280/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b80690044e61d360fd0e7f2da5549a764a1fdac4
--- /dev/null
+++ b/checkpoint-13280/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8bd36f4c4292c3d390f593bc70a7f6421f9732c1adde462e862d0e1dd1f20c9
+size 242041896
diff --git a/checkpoint-13280/optimizer.pt b/checkpoint-13280/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..08968775aecea7b21468be71c46e9bf2b2c8d240
--- /dev/null
+++ b/checkpoint-13280/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:301f1132d8ad5001f1b12cf4300d5b23653e59d8885b0c5005bf916df091417c
+size 484163514
diff --git a/checkpoint-13280/rng_state.pth b/checkpoint-13280/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8a2972c81a759bd6cc347465ee6885c690a43978
--- /dev/null
+++ b/checkpoint-13280/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb1f5a9993254e34859ad003f7605d6f3577e096450d91bae1e372fe7a69711b
+size 14244
diff --git a/checkpoint-13280/scheduler.pt b/checkpoint-13280/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c31f0c3d7bbfe5d9013f0c947a69f06f7bfe724
--- /dev/null
+++ b/checkpoint-13280/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9cd06fe26347b0405fefe5ed1eea662b1479b2b2b4875e24a34f8e1b73b73ad7
+size 1064
diff --git a/checkpoint-13280/trainer_state.json b/checkpoint-13280/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f57e8c02d4727b8bf83abe190f348c99b39a51e8
--- /dev/null
+++ b/checkpoint-13280/trainer_state.json
@@ -0,0 +1,215 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 20.0,
+  "eval_steps": 500,
+  "global_step": 13280,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    },
+    {
+      "epoch": 8.283132530120483,
+      "grad_norm": 0.05083702132105827,
+      "learning_rate": 0.0002929216867469879,
+      "loss": 0.028,
+      "step": 5500
+    },
+    {
+      "epoch": 9.036144578313253,
+      "grad_norm": 0.073179692029953,
+      "learning_rate": 0.0002740963855421687,
+      "loss": 0.0275,
+      "step": 6000
+    },
+    {
+      "epoch": 9.789156626506024,
+      "grad_norm": 0.060432616621255875,
+      "learning_rate": 0.0002552710843373494,
+      "loss": 0.0266,
+      "step": 6500
+    },
+    {
+      "epoch": 10.542168674698795,
+      "grad_norm": 0.05641400068998337,
+      "learning_rate": 0.00023644578313253013,
+      "loss": 0.0265,
+      "step": 7000
+    },
+    {
+      "epoch": 11.295180722891565,
+      "grad_norm": 0.055228352546691895,
+      "learning_rate": 0.00021762048192771087,
+      "loss": 0.0257,
+      "step": 7500
+    },
+    {
+      "epoch": 12.048192771084338,
+      "grad_norm": 0.055986884981393814,
+      "learning_rate": 0.00019879518072289158,
+      "loss": 0.0254,
+      "step": 8000
+    },
+    {
+      "epoch": 12.801204819277109,
+      "grad_norm": 0.06879087537527084,
+      "learning_rate": 0.0001799698795180723,
+      "loss": 0.025,
+      "step": 8500
+    },
+    {
+      "epoch": 13.55421686746988,
+      "grad_norm": 0.08162941783666611,
+      "learning_rate": 0.00016114457831325303,
+      "loss": 0.0248,
+      "step": 9000
+    },
+    {
+      "epoch": 14.30722891566265,
+      "grad_norm": 0.0502689927816391,
+      "learning_rate": 0.00014231927710843374,
+      "loss": 0.0242,
+      "step": 9500
+    },
+    {
+      "epoch": 15.060240963855422,
+      "grad_norm": 0.052483588457107544,
+      "learning_rate": 0.00012349397590361445,
+      "loss": 0.0245,
+      "step": 10000
+    },
+    {
+      "epoch": 15.813253012048193,
+      "grad_norm": 0.04214683175086975,
+      "learning_rate": 0.00010466867469879517,
+      "loss": 0.0238,
+      "step": 10500
+    },
+    {
+      "epoch": 16.566265060240966,
+      "grad_norm": 0.03767360374331474,
+      "learning_rate": 8.58433734939759e-05,
+      "loss": 0.0239,
+      "step": 11000
+    },
+    {
+      "epoch": 17.319277108433734,
+      "grad_norm": 0.04902500659227371,
+      "learning_rate": 6.701807228915662e-05,
+      "loss": 0.0234,
+      "step": 11500
+    },
+    {
+      "epoch": 18.072289156626507,
+      "grad_norm": 0.058824148029088974,
+      "learning_rate": 4.8192771084337354e-05,
+      "loss": 0.0232,
+      "step": 12000
+    },
+    {
+      "epoch": 18.825301204819276,
+      "grad_norm": 0.06361762434244156,
+      "learning_rate": 2.9367469879518075e-05,
+      "loss": 0.0231,
+      "step": 12500
+    },
+    {
+      "epoch": 19.57831325301205,
+      "grad_norm": 0.053078796714544296,
+      "learning_rate": 1.0542168674698795e-05,
+      "loss": 0.0232,
+      "step": 13000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7180220922593280.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-13280/training_args.bin b/checkpoint-13280/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-13280/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-1992/config.json b/checkpoint-1992/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-1992/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-1992/generation_config.json b/checkpoint-1992/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-1992/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-1992/model.safetensors b/checkpoint-1992/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..319405e4e3719406d7e6a9684fd0549d5d73a0fb
--- /dev/null
+++ b/checkpoint-1992/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4654c9cf89a81b68bd35bdc6ee00aaf996858cbfd524ab9a2f899ba3c710dd9b
+size 242041896
diff --git a/checkpoint-1992/optimizer.pt b/checkpoint-1992/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab0daa0f04a8bbb18ab7b3298f1dd4fa35799458
--- /dev/null
+++ b/checkpoint-1992/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1926465b24f3299cef677e55de777f74ed47bceb1d2f671026fc7252dbe2335
+size 484163514
diff --git a/checkpoint-1992/rng_state.pth b/checkpoint-1992/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..781d63b0f677dcbb17abc3b3c80e53f6c5d686d0
--- /dev/null
+++ b/checkpoint-1992/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28b3210c6546f5c024fe5468a4c5cddf81630f8b46f6f1c93edbffea14133b6b
+size 14244
diff --git a/checkpoint-1992/scheduler.pt b/checkpoint-1992/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..490af9dd36e6e9f50ef3fac87482eea3974c47ca
--- /dev/null
+++ b/checkpoint-1992/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a73930a28f6f17579b171324c0dc859f039babe921c42761a59cde8ea4ef9213
+size 1064
diff --git a/checkpoint-1992/trainer_state.json b/checkpoint-1992/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c46ded255c5bde1c6e2d6c379bee1968e1c9ca21
--- /dev/null
+++ b/checkpoint-1992/trainer_state.json
@@ -0,0 +1,54 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1992,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1077033138388992.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-1992/training_args.bin b/checkpoint-1992/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-1992/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-2656/config.json b/checkpoint-2656/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-2656/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-2656/generation_config.json b/checkpoint-2656/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-2656/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-2656/model.safetensors b/checkpoint-2656/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..084ba2c29275f6618c0a9d6cb78be0e678360702
--- /dev/null
+++ b/checkpoint-2656/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4ec6681d5e4655771228f516ef82ae9953b4bd8a03023f89c01ef653ebd3c65
+size 242041896
diff --git a/checkpoint-2656/optimizer.pt b/checkpoint-2656/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..57d3ff2dbc91efde266de56e886c329e67552c07
--- /dev/null
+++ b/checkpoint-2656/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b307f8e49034866c5c015630f8fa654a331a9e803649ccf615b1b56d0516f90
+size 484163514
diff --git a/checkpoint-2656/rng_state.pth b/checkpoint-2656/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2d077571a7a87e97984796dbc7a5adceaf7a94b2
--- /dev/null
+++ b/checkpoint-2656/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae0f1d3440823ebea1f70cff469eda177d4a0550eadf34c3a2c8b334329912b3
+size 14244
diff --git a/checkpoint-2656/scheduler.pt b/checkpoint-2656/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..725bd0bd1e8e2bacca254cb8ec535d5d0f29e80d
--- /dev/null
+++ b/checkpoint-2656/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee111b2e58fd87c7cff54e4b9df7ecced7c7c69c404c5b2b50093b27271a3d80
+size 1064
diff --git a/checkpoint-2656/trainer_state.json b/checkpoint-2656/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7e457a4b2c127dfdea5d21780a1a791250bf38fe
--- /dev/null
+++ b/checkpoint-2656/trainer_state.json
@@ -0,0 +1,68 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 2656,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1436044184518656.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-2656/training_args.bin b/checkpoint-2656/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-2656/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-3320/config.json b/checkpoint-3320/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-3320/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-3320/generation_config.json b/checkpoint-3320/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-3320/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-3320/model.safetensors b/checkpoint-3320/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..945b6f3f4509cda778eef4d48e0b4dd0c4fd555f
--- /dev/null
+++ b/checkpoint-3320/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e7399036d6d5e0ff1e62004f221407f1fcbea3065f4fca268c8343ebd2d4a9a
+size 242041896
diff --git a/checkpoint-3320/optimizer.pt b/checkpoint-3320/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6af9b0da4e2c35e99f6d8f304b5169295a2d1fb9
--- /dev/null
+++ b/checkpoint-3320/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:caa347079a7baa18cc423c737e9185fe57aaf20106ce7a5b3afd2f5b1c501d8f
+size 484163514
diff --git a/checkpoint-3320/rng_state.pth b/checkpoint-3320/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e6404db70c5ad1c3d0ae2bdabd947c93d0af3271
--- /dev/null
+++ b/checkpoint-3320/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:529b97068a3d90cb9fb6c7d90e901db3252ceebae3b8338675debc65f0a7d72d
+size 14244
diff --git a/checkpoint-3320/scheduler.pt b/checkpoint-3320/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0e6a0c462212ecc793cde8a2c265bdff33b76613
--- /dev/null
+++ b/checkpoint-3320/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b758b7d414e163d0c48d4e99ae5665ecea0166d393eeca607ecdd2a896818721
+size 1064
diff --git a/checkpoint-3320/trainer_state.json b/checkpoint-3320/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..ff7a6876cab5995ef2d8595440bf023ae0408c29
--- /dev/null
+++ b/checkpoint-3320/trainer_state.json
@@ -0,0 +1,75 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 3320,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1795055230648320.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-3320/training_args.bin b/checkpoint-3320/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-3320/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-3984/config.json b/checkpoint-3984/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-3984/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-3984/generation_config.json b/checkpoint-3984/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-3984/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-3984/model.safetensors b/checkpoint-3984/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..30f7d99c37e674931b45c7e7dff4c20984d5344a
--- /dev/null
+++ b/checkpoint-3984/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08db43789d7b94431ebff25043dfb90f12efaceba68e72907f16f9e4cbb9b9b3
+size 242041896
diff --git a/checkpoint-3984/optimizer.pt b/checkpoint-3984/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a41b132e8c9887130752ceb3ed6599ff77b79c65
--- /dev/null
+++ b/checkpoint-3984/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e12588ec100a97e446d7a448bac840d4bd2ce689b5391cae9fd7706c164d88c
+size 484163514
diff --git a/checkpoint-3984/rng_state.pth b/checkpoint-3984/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..eb31094de8b73410ce44a3b8ea6902e33b005795
--- /dev/null
+++ b/checkpoint-3984/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f8dc8a7a89a3faf3bb85cdca2961459049b8b93a23c61b6d875073636018d37
+size 14244
diff --git a/checkpoint-3984/scheduler.pt b/checkpoint-3984/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7c9e610d77d7c8a192e66a871fd3cab8420fa63c
--- /dev/null
+++ b/checkpoint-3984/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55a38be13b64a4268dec8b12c25db249c63b5cb35f03790de5dba401337d59a7
+size 1064
diff --git a/checkpoint-3984/trainer_state.json b/checkpoint-3984/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..697ec9be4315a83dc20bb2110a83c719b8ee5114
--- /dev/null
+++ b/checkpoint-3984/trainer_state.json
@@ -0,0 +1,82 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 3984,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2154066276777984.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-3984/training_args.bin b/checkpoint-3984/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-3984/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-4648/config.json b/checkpoint-4648/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-4648/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-4648/generation_config.json b/checkpoint-4648/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-4648/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-4648/model.safetensors b/checkpoint-4648/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3636ed8559ceffa02f455161ac5c43b1a52a77ba
--- /dev/null
+++ b/checkpoint-4648/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:448ef945fa76cd4e82bb178701e8ae578edde09c47d80f194c7746380fa6a609
+size 242041896
diff --git a/checkpoint-4648/optimizer.pt b/checkpoint-4648/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c994b658ccf6b1e2cfacc574dcc3c524a2d54ac
--- /dev/null
+++ b/checkpoint-4648/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d1dec9842ed2d0bfbaa1b2b86332c91693557caa9658897c7b33fab6a538be4
+size 484163514
diff --git a/checkpoint-4648/rng_state.pth b/checkpoint-4648/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..65fb5db9edaf0180f96886d7effedef259c96945
--- /dev/null
+++ b/checkpoint-4648/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c17df5c2080c1049dea480106929daa9a55517c204856b809e27f5fcc16cebaa
+size 14244
diff --git a/checkpoint-4648/scheduler.pt b/checkpoint-4648/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1058f05f541befc59fb016b0f0b06fa83aa6babe
--- /dev/null
+++ b/checkpoint-4648/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5152b17c98368152e474b87d9c64ba6d97c8428f85637733e237f8b69b5bd937
+size 1064
diff --git a/checkpoint-4648/trainer_state.json b/checkpoint-4648/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c50dcc1da7697c8072a8c86dbe73e4c21d42bda3
--- /dev/null
+++ b/checkpoint-4648/trainer_state.json
@@ -0,0 +1,96 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 4648,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2513077322907648.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-4648/training_args.bin b/checkpoint-4648/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-4648/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-5312/config.json b/checkpoint-5312/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-5312/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-5312/generation_config.json b/checkpoint-5312/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-5312/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-5312/model.safetensors b/checkpoint-5312/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..930e7d1942ca313062a9f932fa660fadbbd18578
--- /dev/null
+++ b/checkpoint-5312/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97885c22d9eb94e533d5173e0636885717a8db079549ca29089a0c10818e46d5
+size 242041896
diff --git a/checkpoint-5312/optimizer.pt b/checkpoint-5312/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..73d0750946142b371cf06969e6dd4a8dc3b56b54
--- /dev/null
+++ b/checkpoint-5312/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6634dd9a4e7c4067d256a6e4da06a62dd827d1a796294a721d79d66e157cc993
+size 484163514
diff --git a/checkpoint-5312/rng_state.pth b/checkpoint-5312/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..31ee14ad4e013587e6348ff4ed1d931d8779518f
--- /dev/null
+++ b/checkpoint-5312/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77c239e7c3f387e4f5cbfdc069a340b4dc4816ee410605f4c761fdd1968cba1e
+size 14244
diff --git a/checkpoint-5312/scheduler.pt b/checkpoint-5312/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d723c517c6ddf090d224ad81c543d0b1d41b9d4d
--- /dev/null
+++ b/checkpoint-5312/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2df5eec69e36f13fe010830741b5864241e4d5169d6ce46963c1274f11e51521
+size 1064
diff --git a/checkpoint-5312/trainer_state.json b/checkpoint-5312/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..46b6a7daf487fb2dfb7d6b25ad9cc669dfefcecc
--- /dev/null
+++ b/checkpoint-5312/trainer_state.json
@@ -0,0 +1,103 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 8.0,
+  "eval_steps": 500,
+  "global_step": 5312,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2872088369037312.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-5312/training_args.bin b/checkpoint-5312/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-5312/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-5976/config.json b/checkpoint-5976/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-5976/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-5976/generation_config.json b/checkpoint-5976/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-5976/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-5976/model.safetensors b/checkpoint-5976/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..628f01ba6e3b9876af051d0c08ea803a2ec6d3c0
--- /dev/null
+++ b/checkpoint-5976/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:311cbd6d4cd82234eb76ff9c9ce6fd953e257c6ae5f7dcbb286977d0b335de54
+size 242041896
diff --git a/checkpoint-5976/optimizer.pt b/checkpoint-5976/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8354df8c03d621c778ecff9e3f7f22a4fb54e344
--- /dev/null
+++ b/checkpoint-5976/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:198916391e1273de56278390d5acf7c8d1a34a75617147c870c0d8316b1c0139
+size 484163514
diff --git a/checkpoint-5976/rng_state.pth b/checkpoint-5976/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..890b864a7d06688a6633f54f2661ab99eb956388
--- /dev/null
+++ b/checkpoint-5976/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae9e66325b1927e9d04818200f5bc6cf72234ba0e00a7c2866209e02546bc09b
+size 14244
diff --git a/checkpoint-5976/scheduler.pt b/checkpoint-5976/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6b36338911aeeb839c0859868b91ed783ca7371c
--- /dev/null
+++ b/checkpoint-5976/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:917a41ff3e07e604221fa6f89f369743864ab7c91a1a30a583d4e4de1beb4787
+size 1064
diff --git a/checkpoint-5976/trainer_state.json b/checkpoint-5976/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..643d274c0f7d1dbe18967c86f37d85447e38004b
--- /dev/null
+++ b/checkpoint-5976/trainer_state.json
@@ -0,0 +1,110 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.0,
+  "eval_steps": 500,
+  "global_step": 5976,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    },
+    {
+      "epoch": 8.283132530120483,
+      "grad_norm": 0.05083702132105827,
+      "learning_rate": 0.0002929216867469879,
+      "loss": 0.028,
+      "step": 5500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3231099415166976.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-5976/training_args.bin b/checkpoint-5976/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-5976/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-664/config.json b/checkpoint-664/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-664/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-664/generation_config.json b/checkpoint-664/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-664/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-664/model.safetensors b/checkpoint-664/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bb406f540db284f35bff6bc0300a3d1215ceb35f
--- /dev/null
+++ b/checkpoint-664/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5326698f51b49d4338344b480ff4e7e8c672f556c44750b3a50c5d1b162f97a2
+size 242041896
diff --git a/checkpoint-664/optimizer.pt b/checkpoint-664/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c3e08cfc50d5ef01f0648420673a4eb7570230cb
--- /dev/null
+++ b/checkpoint-664/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab5d4848a96aed8a1fff784a146e81e46205935859ca1ecbb7603ddeb35b3e84
+size 484163514
diff --git a/checkpoint-664/rng_state.pth b/checkpoint-664/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..256830d8e6ffd79234fc150e5427c5a89677b4a4
--- /dev/null
+++ b/checkpoint-664/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:759c31a50d8add4657e65be041371e769aefb366f904d90ba4259345c29f9990
+size 14244
diff --git a/checkpoint-664/scheduler.pt b/checkpoint-664/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..338b16e86bfda1388fb16c465ac25c64ea03038e
--- /dev/null
+++ b/checkpoint-664/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41cecb99d54993a8c486c753142cc0a78f52ecd733a3208a065fc8d71c0abf0b
+size 1064
diff --git a/checkpoint-664/trainer_state.json b/checkpoint-664/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..44f5ce7cfd67089e8be2980fabdc08876489d16f
--- /dev/null
+++ b/checkpoint-664/trainer_state.json
@@ -0,0 +1,40 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 664,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 359011046129664.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-664/training_args.bin b/checkpoint-664/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-664/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-6640/config.json b/checkpoint-6640/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-6640/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-6640/generation_config.json b/checkpoint-6640/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-6640/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-6640/model.safetensors b/checkpoint-6640/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ae2b656c100f3ba628b0613e37b5bd26f44105f3
--- /dev/null
+++ b/checkpoint-6640/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92259b98bc9b79af81d13747e953252c6a06be36482dfd40b06c76b6e040185b
+size 242041896
diff --git a/checkpoint-6640/optimizer.pt b/checkpoint-6640/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d568c779d71952512ecca8dcebd2ca4d12711303
--- /dev/null
+++ b/checkpoint-6640/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0cabf1a2e7283a2b1d2b32a4d1456a0064eb8205847773c9ea6a061369c96000
+size 484163514
diff --git a/checkpoint-6640/rng_state.pth b/checkpoint-6640/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6c5a252837d45edbb41ea64e1cf8ecaa5af625ad
--- /dev/null
+++ b/checkpoint-6640/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:120f67d248858b7906f6721742710d32974b84e1a9b4111d6feda98d03fd33ab
+size 14244
diff --git a/checkpoint-6640/scheduler.pt b/checkpoint-6640/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d679820ab769ca1efedbcf357d2f84b37b840c4f
--- /dev/null
+++ b/checkpoint-6640/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:555dff0a6f6c5d04e4a4657553dd850a6f3cad38c29a9822aa9df4a6bff3beea
+size 1064
diff --git a/checkpoint-6640/trainer_state.json b/checkpoint-6640/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..6e42fe7e76d11f76f73a315b743a895e36ba5294
--- /dev/null
+++ b/checkpoint-6640/trainer_state.json
@@ -0,0 +1,124 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 10.0,
+  "eval_steps": 500,
+  "global_step": 6640,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    },
+    {
+      "epoch": 8.283132530120483,
+      "grad_norm": 0.05083702132105827,
+      "learning_rate": 0.0002929216867469879,
+      "loss": 0.028,
+      "step": 5500
+    },
+    {
+      "epoch": 9.036144578313253,
+      "grad_norm": 0.073179692029953,
+      "learning_rate": 0.0002740963855421687,
+      "loss": 0.0275,
+      "step": 6000
+    },
+    {
+      "epoch": 9.789156626506024,
+      "grad_norm": 0.060432616621255875,
+      "learning_rate": 0.0002552710843373494,
+      "loss": 0.0266,
+      "step": 6500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3590110461296640.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-6640/training_args.bin b/checkpoint-6640/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-6640/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-7304/config.json b/checkpoint-7304/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-7304/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-7304/generation_config.json b/checkpoint-7304/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-7304/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-7304/model.safetensors b/checkpoint-7304/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..16354591c3a98344dea586aa72bf65ba6dfd3cac
--- /dev/null
+++ b/checkpoint-7304/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97c41cfd1192c3344cd4cec5cebc61b1d8e5be2de2ed9729d86cbdac3f4cd073
+size 242041896
diff --git a/checkpoint-7304/optimizer.pt b/checkpoint-7304/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..997add471125d148918d0df318ab7bf478264c4f
--- /dev/null
+++ b/checkpoint-7304/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a7d52b02948c75110045598b29ee91cdefd925c73102050776caa706dec1454
+size 484163514
diff --git a/checkpoint-7304/rng_state.pth b/checkpoint-7304/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3e05fae5846a4b2cea726a3eb442245381058ead
--- /dev/null
+++ b/checkpoint-7304/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fab093e333ad3fd097582a8ce805c2b1762364df5f23e2da0e43c6f6ff6d0ae
+size 14244
diff --git a/checkpoint-7304/scheduler.pt b/checkpoint-7304/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c721ec4b35892a10ebe66746a13d3eb25661ebfa
--- /dev/null
+++ b/checkpoint-7304/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf23df0d8cedd91a793d43623f7046ed4e399742ab23cf64b70e5f4e12b10297
+size 1064
diff --git a/checkpoint-7304/trainer_state.json b/checkpoint-7304/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d0231fa5792e59274c9425b183d55e59ae87dd3
--- /dev/null
+++ b/checkpoint-7304/trainer_state.json
@@ -0,0 +1,131 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 11.0,
+  "eval_steps": 500,
+  "global_step": 7304,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    },
+    {
+      "epoch": 8.283132530120483,
+      "grad_norm": 0.05083702132105827,
+      "learning_rate": 0.0002929216867469879,
+      "loss": 0.028,
+      "step": 5500
+    },
+    {
+      "epoch": 9.036144578313253,
+      "grad_norm": 0.073179692029953,
+      "learning_rate": 0.0002740963855421687,
+      "loss": 0.0275,
+      "step": 6000
+    },
+    {
+      "epoch": 9.789156626506024,
+      "grad_norm": 0.060432616621255875,
+      "learning_rate": 0.0002552710843373494,
+      "loss": 0.0266,
+      "step": 6500
+    },
+    {
+      "epoch": 10.542168674698795,
+      "grad_norm": 0.05641400068998337,
+      "learning_rate": 0.00023644578313253013,
+      "loss": 0.0265,
+      "step": 7000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3949121507426304.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-7304/training_args.bin b/checkpoint-7304/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-7304/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-7968/config.json b/checkpoint-7968/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-7968/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-7968/generation_config.json b/checkpoint-7968/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-7968/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-7968/model.safetensors b/checkpoint-7968/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..382e80579ae0705d8d27ef8ed0eb792bb0d3a28d
--- /dev/null
+++ b/checkpoint-7968/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0fbb2ed6307aa958e65b192a0cdafa1bc9b8dfa0b79efb83044c0eeaba67d8f
+size 242041896
diff --git a/checkpoint-7968/optimizer.pt b/checkpoint-7968/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7204a6272329349b6b7a57b6bee598b3ed5a24ac
--- /dev/null
+++ b/checkpoint-7968/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ce84f9c60232c41edd5348b819a63874ff3df25cc00ba0d5b8325264ab3f56d
+size 484163514
diff --git a/checkpoint-7968/rng_state.pth b/checkpoint-7968/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5e919fd89a9980d80dd5381e8a89ea1e2196736a
--- /dev/null
+++ b/checkpoint-7968/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a28fd3207e4b154c13ff75e506377848dc740f461fb5bb9bd86078faa06f31c
+size 14244
diff --git a/checkpoint-7968/scheduler.pt b/checkpoint-7968/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4267e4c9be4bfcc1f773f39df7989bdc04e9970c
--- /dev/null
+++ b/checkpoint-7968/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a63f49a11ca4824d69d15383245b8a9d6993c3692826ceaa22f1653db67e142
+size 1064
diff --git a/checkpoint-7968/trainer_state.json b/checkpoint-7968/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f826720213039a6bd884a0b0fb3e1a93c4674cfc
--- /dev/null
+++ b/checkpoint-7968/trainer_state.json
@@ -0,0 +1,138 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 12.0,
+  "eval_steps": 500,
+  "global_step": 7968,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    },
+    {
+      "epoch": 8.283132530120483,
+      "grad_norm": 0.05083702132105827,
+      "learning_rate": 0.0002929216867469879,
+      "loss": 0.028,
+      "step": 5500
+    },
+    {
+      "epoch": 9.036144578313253,
+      "grad_norm": 0.073179692029953,
+      "learning_rate": 0.0002740963855421687,
+      "loss": 0.0275,
+      "step": 6000
+    },
+    {
+      "epoch": 9.789156626506024,
+      "grad_norm": 0.060432616621255875,
+      "learning_rate": 0.0002552710843373494,
+      "loss": 0.0266,
+      "step": 6500
+    },
+    {
+      "epoch": 10.542168674698795,
+      "grad_norm": 0.05641400068998337,
+      "learning_rate": 0.00023644578313253013,
+      "loss": 0.0265,
+      "step": 7000
+    },
+    {
+      "epoch": 11.295180722891565,
+      "grad_norm": 0.055228352546691895,
+      "learning_rate": 0.00021762048192771087,
+      "loss": 0.0257,
+      "step": 7500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4308132553555968.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-7968/training_args.bin b/checkpoint-7968/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-7968/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-8632/config.json b/checkpoint-8632/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-8632/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-8632/generation_config.json b/checkpoint-8632/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-8632/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-8632/model.safetensors b/checkpoint-8632/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..32ca7bd19f98e480c93918c44926e0ca178d43e4
--- /dev/null
+++ b/checkpoint-8632/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:423127f85334723d8f0b13943974887b1f9dc65a4255ec5a752226cb7eacfe97
+size 242041896
diff --git a/checkpoint-8632/optimizer.pt b/checkpoint-8632/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aad22bdf0c2d49d7540f00d158e804ae0648570e
--- /dev/null
+++ b/checkpoint-8632/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ade0ef0cd6af2cd90a44219869104ecf9f0b1dd2ea3a38f475914239f1e6749
+size 484163514
diff --git a/checkpoint-8632/rng_state.pth b/checkpoint-8632/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..33ffb7b9f445798c1b0b4982bb572f18920dfa4a
--- /dev/null
+++ b/checkpoint-8632/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:facdb018b684b4c3efb1e1ab6d16ad42a9832276c58db1afbda251d4a51b2a08
+size 14244
diff --git a/checkpoint-8632/scheduler.pt b/checkpoint-8632/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..09f4279b8b4f8dc5df6fbfd9d557298ef467125f
--- /dev/null
+++ b/checkpoint-8632/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84024b040e732904477d5a9d2a12639148bccd02a323b6b5cbd1dc78e2402c0f
+size 1064
diff --git a/checkpoint-8632/trainer_state.json b/checkpoint-8632/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a1867eace4bb9097ff4585b2f8fdd5fa6a34dd04
--- /dev/null
+++ b/checkpoint-8632/trainer_state.json
@@ -0,0 +1,152 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 13.0,
+  "eval_steps": 500,
+  "global_step": 8632,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    },
+    {
+      "epoch": 8.283132530120483,
+      "grad_norm": 0.05083702132105827,
+      "learning_rate": 0.0002929216867469879,
+      "loss": 0.028,
+      "step": 5500
+    },
+    {
+      "epoch": 9.036144578313253,
+      "grad_norm": 0.073179692029953,
+      "learning_rate": 0.0002740963855421687,
+      "loss": 0.0275,
+      "step": 6000
+    },
+    {
+      "epoch": 9.789156626506024,
+      "grad_norm": 0.060432616621255875,
+      "learning_rate": 0.0002552710843373494,
+      "loss": 0.0266,
+      "step": 6500
+    },
+    {
+      "epoch": 10.542168674698795,
+      "grad_norm": 0.05641400068998337,
+      "learning_rate": 0.00023644578313253013,
+      "loss": 0.0265,
+      "step": 7000
+    },
+    {
+      "epoch": 11.295180722891565,
+      "grad_norm": 0.055228352546691895,
+      "learning_rate": 0.00021762048192771087,
+      "loss": 0.0257,
+      "step": 7500
+    },
+    {
+      "epoch": 12.048192771084338,
+      "grad_norm": 0.055986884981393814,
+      "learning_rate": 0.00019879518072289158,
+      "loss": 0.0254,
+      "step": 8000
+    },
+    {
+      "epoch": 12.801204819277109,
+      "grad_norm": 0.06879087537527084,
+      "learning_rate": 0.0001799698795180723,
+      "loss": 0.025,
+      "step": 8500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4667143599685632.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-8632/training_args.bin b/checkpoint-8632/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-8632/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-9296/config.json b/checkpoint-9296/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-9296/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-9296/generation_config.json b/checkpoint-9296/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-9296/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-9296/model.safetensors b/checkpoint-9296/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a02f21926b55b84e961949b7f5249ab4a4585e3d
--- /dev/null
+++ b/checkpoint-9296/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9c43c63b6a53b98c89568399b9dba931903a3fcdc0278c25bc10f15eff73987
+size 242041896
diff --git a/checkpoint-9296/optimizer.pt b/checkpoint-9296/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c0a1e4a1968cb1d9f4343dbc58d99a42a928aae7
--- /dev/null
+++ b/checkpoint-9296/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfc0a8e0aea09ecc4bd36162faff192c14d00107415b38c493dde44285779d73
+size 484163514
diff --git a/checkpoint-9296/rng_state.pth b/checkpoint-9296/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1393ccf749963fdd54ac2d13b62ac588c93e1970
--- /dev/null
+++ b/checkpoint-9296/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81dbd91029efc92c25690930a99c8652810dc0b232afd868efb417494a80eb80
+size 14244
diff --git a/checkpoint-9296/scheduler.pt b/checkpoint-9296/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c19c86df771f93c1639437f1f82f0d87be89b40
--- /dev/null
+++ b/checkpoint-9296/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e39d54119fd051b5fc229728fd2918ca57336e2b5ba6d0c996febf241d7d4c9
+size 1064
diff --git a/checkpoint-9296/trainer_state.json b/checkpoint-9296/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..6bc33986c7defe164e4efe77467f61c54ba0fe58
--- /dev/null
+++ b/checkpoint-9296/trainer_state.json
@@ -0,0 +1,159 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 14.0,
+  "eval_steps": 500,
+  "global_step": 9296,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    },
+    {
+      "epoch": 8.283132530120483,
+      "grad_norm": 0.05083702132105827,
+      "learning_rate": 0.0002929216867469879,
+      "loss": 0.028,
+      "step": 5500
+    },
+    {
+      "epoch": 9.036144578313253,
+      "grad_norm": 0.073179692029953,
+      "learning_rate": 0.0002740963855421687,
+      "loss": 0.0275,
+      "step": 6000
+    },
+    {
+      "epoch": 9.789156626506024,
+      "grad_norm": 0.060432616621255875,
+      "learning_rate": 0.0002552710843373494,
+      "loss": 0.0266,
+      "step": 6500
+    },
+    {
+      "epoch": 10.542168674698795,
+      "grad_norm": 0.05641400068998337,
+      "learning_rate": 0.00023644578313253013,
+      "loss": 0.0265,
+      "step": 7000
+    },
+    {
+      "epoch": 11.295180722891565,
+      "grad_norm": 0.055228352546691895,
+      "learning_rate": 0.00021762048192771087,
+      "loss": 0.0257,
+      "step": 7500
+    },
+    {
+      "epoch": 12.048192771084338,
+      "grad_norm": 0.055986884981393814,
+      "learning_rate": 0.00019879518072289158,
+      "loss": 0.0254,
+      "step": 8000
+    },
+    {
+      "epoch": 12.801204819277109,
+      "grad_norm": 0.06879087537527084,
+      "learning_rate": 0.0001799698795180723,
+      "loss": 0.025,
+      "step": 8500
+    },
+    {
+      "epoch": 13.55421686746988,
+      "grad_norm": 0.08162941783666611,
+      "learning_rate": 0.00016114457831325303,
+      "loss": 0.0248,
+      "step": 9000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5026154645815296.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-9296/training_args.bin b/checkpoint-9296/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-9296/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304
diff --git a/checkpoint-9960/config.json b/checkpoint-9960/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..875ba5d9d44b85d53592976d4c2b6425bc63516f
--- /dev/null
+++ b/checkpoint-9960/config.json
@@ -0,0 +1,61 @@
+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}
diff --git a/checkpoint-9960/generation_config.json b/checkpoint-9960/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eba25c5db1745fe5324f4f7e8890c19853e21453
--- /dev/null
+++ b/checkpoint-9960/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.49.0"
+}
diff --git a/checkpoint-9960/model.safetensors b/checkpoint-9960/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dc4fb7d49fd5e62f671c411ec70a26f6d2cbaf12
--- /dev/null
+++ b/checkpoint-9960/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1751f7e5e385ad51d87be4a1e8a3700dfd2961f3cdb846dc3c1ac6debd623a4
+size 242041896
diff --git a/checkpoint-9960/optimizer.pt b/checkpoint-9960/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..db98c0c4f0ec992af96930f2bf49e56c759fac15
--- /dev/null
+++ b/checkpoint-9960/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e2eae6ac59120332d8809ee801e8bc0bd26ae8920c3ec9db6d1aeca9d0822d4
+size 484163514
diff --git a/checkpoint-9960/rng_state.pth b/checkpoint-9960/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0a9b938dfcb34f678a705022a28e7f46bc6fa991
--- /dev/null
+++ b/checkpoint-9960/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d29906570537c4494d8c94edc995d63f294bce306ddf5fd5675ba94aaa973eb5
+size 14244
diff --git a/checkpoint-9960/scheduler.pt b/checkpoint-9960/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5285cac7332751f2cd4d5099af6d72740ecdada0
--- /dev/null
+++ b/checkpoint-9960/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb98c87248ee12946b4022814d53bfb8bb097b76257109323c69d88d57c830ec
+size 1064
diff --git a/checkpoint-9960/trainer_state.json b/checkpoint-9960/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..dc7a97650289fd772cbb6b27fe86b6088a1b5349
--- /dev/null
+++ b/checkpoint-9960/trainer_state.json
@@ -0,0 +1,166 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 15.0,
+  "eval_steps": 500,
+  "global_step": 9960,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7530120481927711,
+      "grad_norm": 0.2647170126438141,
+      "learning_rate": 0.0004811746987951807,
+      "loss": 0.3311,
+      "step": 500
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.22880347073078156,
+      "learning_rate": 0.00046234939759036143,
+      "loss": 0.0907,
+      "step": 1000
+    },
+    {
+      "epoch": 2.2590361445783134,
+      "grad_norm": 0.1677163541316986,
+      "learning_rate": 0.00044352409638554217,
+      "loss": 0.0568,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.12338300049304962,
+      "learning_rate": 0.0004246987951807229,
+      "loss": 0.0451,
+      "step": 2000
+    },
+    {
+      "epoch": 3.765060240963855,
+      "grad_norm": 0.08597979694604874,
+      "learning_rate": 0.0004058734939759036,
+      "loss": 0.0386,
+      "step": 2500
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.0988745242357254,
+      "learning_rate": 0.00038704819277108433,
+      "loss": 0.0352,
+      "step": 3000
+    },
+    {
+      "epoch": 5.271084337349397,
+      "grad_norm": 0.11785969883203506,
+      "learning_rate": 0.00036822289156626507,
+      "loss": 0.0331,
+      "step": 3500
+    },
+    {
+      "epoch": 6.024096385542169,
+      "grad_norm": 0.09906379133462906,
+      "learning_rate": 0.0003493975903614458,
+      "loss": 0.0315,
+      "step": 4000
+    },
+    {
+      "epoch": 6.77710843373494,
+      "grad_norm": 0.1129639744758606,
+      "learning_rate": 0.0003305722891566265,
+      "loss": 0.0301,
+      "step": 4500
+    },
+    {
+      "epoch": 7.530120481927711,
+      "grad_norm": 0.07321502268314362,
+      "learning_rate": 0.00031174698795180723,
+      "loss": 0.0292,
+      "step": 5000
+    },
+    {
+      "epoch": 8.283132530120483,
+      "grad_norm": 0.05083702132105827,
+      "learning_rate": 0.0002929216867469879,
+      "loss": 0.028,
+      "step": 5500
+    },
+    {
+      "epoch": 9.036144578313253,
+      "grad_norm": 0.073179692029953,
+      "learning_rate": 0.0002740963855421687,
+      "loss": 0.0275,
+      "step": 6000
+    },
+    {
+      "epoch": 9.789156626506024,
+      "grad_norm": 0.060432616621255875,
+      "learning_rate": 0.0002552710843373494,
+      "loss": 0.0266,
+      "step": 6500
+    },
+    {
+      "epoch": 10.542168674698795,
+      "grad_norm": 0.05641400068998337,
+      "learning_rate": 0.00023644578313253013,
+      "loss": 0.0265,
+      "step": 7000
+    },
+    {
+      "epoch": 11.295180722891565,
+      "grad_norm": 0.055228352546691895,
+      "learning_rate": 0.00021762048192771087,
+      "loss": 0.0257,
+      "step": 7500
+    },
+    {
+      "epoch": 12.048192771084338,
+      "grad_norm": 0.055986884981393814,
+      "learning_rate": 0.00019879518072289158,
+      "loss": 0.0254,
+      "step": 8000
+    },
+    {
+      "epoch": 12.801204819277109,
+      "grad_norm": 0.06879087537527084,
+      "learning_rate": 0.0001799698795180723,
+      "loss": 0.025,
+      "step": 8500
+    },
+    {
+      "epoch": 13.55421686746988,
+      "grad_norm": 0.08162941783666611,
+      "learning_rate": 0.00016114457831325303,
+      "loss": 0.0248,
+      "step": 9000
+    },
+    {
+      "epoch": 14.30722891566265,
+      "grad_norm": 0.0502689927816391,
+      "learning_rate": 0.00014231927710843374,
+      "loss": 0.0242,
+      "step": 9500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5385165691944960.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-9960/training_args.bin b/checkpoint-9960/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4458cc722a24e1abde581b8667a40b1674f5aea
--- /dev/null
+++ b/checkpoint-9960/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2ab2506605f8b233e63780e2c7aa14dc16da409c9e0e5c14641534e5ff016
+size 5304