diff --git a/.gitattributes b/.gitattributes
index 80b5b77a5938402840a46182085e886f245a8d13..5c664143ac0587be17801213b4ed79d6cd48bc7e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -38,3 +38,13 @@ xlm/hierarchical_xlm_roberta/checkpoint-2688/tokenizer.json filter=lfs diff=lfs
xlm/hierarchical_xlm_roberta/checkpoint-3584/tokenizer.json filter=lfs diff=lfs merge=lfs -text
xlm/hierarchical_xlm_roberta/checkpoint-4480/tokenizer.json filter=lfs diff=lfs merge=lfs -text
xlm/hierarchical_xlm_roberta/checkpoint-896/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+joint_model/checkpoint-1116/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+joint_model/checkpoint-1395/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+joint_model/checkpoint-1674/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+joint_model/checkpoint-1953/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+joint_model/checkpoint-2232/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+joint_model/checkpoint-2511/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+joint_model/checkpoint-279/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+joint_model/checkpoint-2790/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+joint_model/checkpoint-558/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+joint_model/checkpoint-837/tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/joint_model/checkpoint-1116/config.json b/joint_model/checkpoint-1116/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e
--- /dev/null
+++ b/joint_model/checkpoint-1116/config.json
@@ -0,0 +1,27 @@
+{
+ "architectures": [
+ "HierarchicalXLMRoberta"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "bos_token_id": 0,
+ "classifier_dropout": null,
+ "dtype": "float32",
+ "eos_token_id": 2,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_norm_eps": 1e-05,
+ "max_position_embeddings": 514,
+ "model_type": "xlm-roberta",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "position_embedding_type": "absolute",
+ "transformers_version": "4.56.1",
+ "type_vocab_size": 1,
+ "use_cache": true,
+ "vocab_size": 250002
+}
diff --git a/joint_model/checkpoint-1116/model.safetensors b/joint_model/checkpoint-1116/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b36df5a8cedb22370fa9f5bcfe329405d7064c47
--- /dev/null
+++ b/joint_model/checkpoint-1116/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6792b2f6f167bf98323526de92da30fc30176242f8e06b2af08a1f76528af6d2
+size 1112408092
diff --git a/joint_model/checkpoint-1116/optimizer.pt b/joint_model/checkpoint-1116/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ffd7530abe63d88351e38ccd505a222528fdfc30
--- /dev/null
+++ b/joint_model/checkpoint-1116/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09fab47c4a460fe4a9c21057cf29f5557d19a2d723b628bc0898a267dadaa022
+size 2224937355
diff --git a/joint_model/checkpoint-1116/rng_state_0.pth b/joint_model/checkpoint-1116/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..370b1ba2fe6af728e764dc2c23d043a82ffcdd65
--- /dev/null
+++ b/joint_model/checkpoint-1116/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c2ca6cda30f0e67f92af2785341362f9fd75975cfa0e1e3edf170b31356982d
+size 16389
diff --git a/joint_model/checkpoint-1116/rng_state_1.pth b/joint_model/checkpoint-1116/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..970ec8c78c6dd49742acc81896fd66a80e1a1fc4
--- /dev/null
+++ b/joint_model/checkpoint-1116/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e19c421d330efe795fe888269f2979bdecc155b8ec1bae695ab646f830eda58
+size 16389
diff --git a/joint_model/checkpoint-1116/rng_state_2.pth b/joint_model/checkpoint-1116/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..44fc11d15282c307b7963a6b6f491561455773dd
--- /dev/null
+++ b/joint_model/checkpoint-1116/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4bfe1bdb6ffcc11de3dfa36f4f902a025948dd1a9d55f9e2a0c37a0c71d8993
+size 16389
diff --git a/joint_model/checkpoint-1116/rng_state_3.pth b/joint_model/checkpoint-1116/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c6adc4374ed209813627ead664506f858bedddcb
--- /dev/null
+++ b/joint_model/checkpoint-1116/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7db4f6f2db3a4f2891f0c6123c6790aa8182a24731540060e1c5162bdfcf332
+size 16389
diff --git a/joint_model/checkpoint-1116/rng_state_4.pth b/joint_model/checkpoint-1116/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6846ade94d473376d4b16cc63ab21c879a5d4359
--- /dev/null
+++ b/joint_model/checkpoint-1116/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e325614869957231c07b72ed17ee0c0dec786269ff93abf17b16883c45761f74
+size 16389
diff --git a/joint_model/checkpoint-1116/rng_state_5.pth b/joint_model/checkpoint-1116/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..65d95e4b0202accfe850a9d9dd094963e3a7e18a
--- /dev/null
+++ b/joint_model/checkpoint-1116/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f06f2380c4f07e128133b104b687744df3b4064ecc4b332886d237fcca20b844
+size 16389
diff --git a/joint_model/checkpoint-1116/rng_state_6.pth b/joint_model/checkpoint-1116/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..39fa906a21713ad6721331651b3ce2d9bbcea060
--- /dev/null
+++ b/joint_model/checkpoint-1116/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6806143edb28cfaf20fbcd82eda60303d3570b7414ff8996f022cf2b33990496
+size 16389
diff --git a/joint_model/checkpoint-1116/rng_state_7.pth b/joint_model/checkpoint-1116/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e1713a674f32dd13384652ba674cd527315f4dcb
--- /dev/null
+++ b/joint_model/checkpoint-1116/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd5cdf368ea3798bd2e333443bb2d4214053bef95ce8af4541fba9630a6adda4
+size 16389
diff --git a/joint_model/checkpoint-1116/scheduler.pt b/joint_model/checkpoint-1116/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab27d781542a9529f435bad1d10c56162329301f
--- /dev/null
+++ b/joint_model/checkpoint-1116/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d771b00dd64d2f869efc678cde233c82e3d5d80f32fc0afc37d60c16f5276a7
+size 1465
diff --git a/joint_model/checkpoint-1116/sentencepiece.bpe.model b/joint_model/checkpoint-1116/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/joint_model/checkpoint-1116/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/joint_model/checkpoint-1116/special_tokens_map.json b/joint_model/checkpoint-1116/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/joint_model/checkpoint-1116/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-1116/tokenizer.json b/joint_model/checkpoint-1116/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed
--- /dev/null
+++ b/joint_model/checkpoint-1116/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
+size 17082734
diff --git a/joint_model/checkpoint-1116/tokenizer_config.json b/joint_model/checkpoint-1116/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24
--- /dev/null
+++ b/joint_model/checkpoint-1116/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 512,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-1116/trainer_state.json b/joint_model/checkpoint-1116/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..76b544ae3abaf6b1a2481ed8bf5f6784de9904f9
--- /dev/null
+++ b/joint_model/checkpoint-1116/trainer_state.json
@@ -0,0 +1,96 @@
+{
+ "best_global_step": 1116,
+ "best_metric": 0.3113965690135956,
+ "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-1116",
+ "epoch": 4.0,
+ "eval_steps": 500,
+ "global_step": 1116,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy_level1": 0.943,
+ "eval_accuracy_level2": 0.8282,
+ "eval_f1_level1": 0.9424784900051851,
+ "eval_f1_level2": 0.7894811362618394,
+ "eval_loss": 1.1101479530334473,
+ "eval_runtime": 0.6677,
+ "eval_samples_per_second": 7488.47,
+ "eval_steps_per_second": 14.977,
+ "step": 279
+ },
+ {
+ "epoch": 1.7921146953405018,
+ "grad_norm": 8.197423934936523,
+ "learning_rate": 1.642293906810036e-05,
+ "loss": 2.029,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy_level1": 0.963,
+ "eval_accuracy_level2": 0.9134,
+ "eval_f1_level1": 0.962976281751424,
+ "eval_f1_level2": 0.9010101771001547,
+ "eval_loss": 0.5648184418678284,
+ "eval_runtime": 0.6225,
+ "eval_samples_per_second": 8032.157,
+ "eval_steps_per_second": 16.064,
+ "step": 558
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy_level1": 0.9684,
+ "eval_accuracy_level2": 0.9404,
+ "eval_f1_level1": 0.9683869415305786,
+ "eval_f1_level2": 0.9353991249189201,
+ "eval_loss": 0.3886409401893616,
+ "eval_runtime": 0.7756,
+ "eval_samples_per_second": 6447.003,
+ "eval_steps_per_second": 12.894,
+ "step": 837
+ },
+ {
+ "epoch": 3.5842293906810037,
+ "grad_norm": 9.112466812133789,
+ "learning_rate": 1.2838709677419356e-05,
+ "loss": 0.5009,
+ "step": 1000
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy_level1": 0.976,
+ "eval_accuracy_level2": 0.948,
+ "eval_f1_level1": 0.9759892345948809,
+ "eval_f1_level2": 0.944852130743217,
+ "eval_loss": 0.3113965690135956,
+ "eval_runtime": 0.7752,
+ "eval_samples_per_second": 6449.701,
+ "eval_steps_per_second": 12.899,
+ "step": 1116
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2790,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 2.9380585063448576e+16,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/joint_model/checkpoint-1116/training_args.bin b/joint_model/checkpoint-1116/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f
--- /dev/null
+++ b/joint_model/checkpoint-1116/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
+size 5777
diff --git a/joint_model/checkpoint-1395/config.json b/joint_model/checkpoint-1395/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e
--- /dev/null
+++ b/joint_model/checkpoint-1395/config.json
@@ -0,0 +1,27 @@
+{
+ "architectures": [
+ "HierarchicalXLMRoberta"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "bos_token_id": 0,
+ "classifier_dropout": null,
+ "dtype": "float32",
+ "eos_token_id": 2,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_norm_eps": 1e-05,
+ "max_position_embeddings": 514,
+ "model_type": "xlm-roberta",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "position_embedding_type": "absolute",
+ "transformers_version": "4.56.1",
+ "type_vocab_size": 1,
+ "use_cache": true,
+ "vocab_size": 250002
+}
diff --git a/joint_model/checkpoint-1395/model.safetensors b/joint_model/checkpoint-1395/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5c2046e7c56b89501b3dbfd8dbfb49079f719a21
--- /dev/null
+++ b/joint_model/checkpoint-1395/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c41e0216d9d3862040536a7fd9ba6b254a014777b0d7f1662f492db7a971f406
+size 1112408092
diff --git a/joint_model/checkpoint-1395/optimizer.pt b/joint_model/checkpoint-1395/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4963aef43b876bb8209e81ffbe6c1155904c2d54
--- /dev/null
+++ b/joint_model/checkpoint-1395/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5607b6daf0f0a370ccef9270477c2e9a830246c8af0dd0c7fe8549c739d2e9ac
+size 2224937355
diff --git a/joint_model/checkpoint-1395/rng_state_0.pth b/joint_model/checkpoint-1395/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..da320c884b910e7ee987fb46b56843fd158772ba
--- /dev/null
+++ b/joint_model/checkpoint-1395/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:372eabe9ee1b698bd155ed64252bb52ecd85f363df08d4a6c4512f0e000cb9b7
+size 16389
diff --git a/joint_model/checkpoint-1395/rng_state_1.pth b/joint_model/checkpoint-1395/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..35e2079094ce2616873e376941a9ecb592824a49
--- /dev/null
+++ b/joint_model/checkpoint-1395/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d00e489391354b35849c16d9fa756e7012f5711d57ab1683f71c55ef187b9dd2
+size 16389
diff --git a/joint_model/checkpoint-1395/rng_state_2.pth b/joint_model/checkpoint-1395/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..84bdd9bbd5e30bd20d61107e4eecc9016a4b9b85
--- /dev/null
+++ b/joint_model/checkpoint-1395/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a0cf5772222d7ef83bf5bfa008bd16ddb5100e0b47d95a97be765c501e4e3cc
+size 16389
diff --git a/joint_model/checkpoint-1395/rng_state_3.pth b/joint_model/checkpoint-1395/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3775bf1aa145c7c637b56d33e846cbb00310434d
--- /dev/null
+++ b/joint_model/checkpoint-1395/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:622b3cd66dcaf83e587a5e0c31a6797932cb588d1367575466e0b8eac0b6b732
+size 16389
diff --git a/joint_model/checkpoint-1395/rng_state_4.pth b/joint_model/checkpoint-1395/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..34c25950706f8efe2fd0069421c3fe56519b1ea0
--- /dev/null
+++ b/joint_model/checkpoint-1395/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dfb97eabb2e40d9e6d49009ad4b964b2c73fa42fbf32c8b1f59ae56bf87d92e
+size 16389
diff --git a/joint_model/checkpoint-1395/rng_state_5.pth b/joint_model/checkpoint-1395/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..739bd65f6c2f4a85c04b39f8b84d7d1b3ade7c42
--- /dev/null
+++ b/joint_model/checkpoint-1395/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c2e0af24431f9c87afbbee452b8c4cb68e55978cc475aca99862285217c6f8a
+size 16389
diff --git a/joint_model/checkpoint-1395/rng_state_6.pth b/joint_model/checkpoint-1395/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..da8d5195bf50293f618fbd821a282240b2ea47b3
--- /dev/null
+++ b/joint_model/checkpoint-1395/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c209482d19626584d72a324c23675cfbc298544cbd62b33073d59f67aa1d16e
+size 16389
diff --git a/joint_model/checkpoint-1395/rng_state_7.pth b/joint_model/checkpoint-1395/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3642f908d293393704d51779169855cb1607d60b
--- /dev/null
+++ b/joint_model/checkpoint-1395/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1547a4b17eaa25162d8c2232659379d5dc26ac63c151280e984e29b60718d591
+size 16389
diff --git a/joint_model/checkpoint-1395/scheduler.pt b/joint_model/checkpoint-1395/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..db3b1d5513e6f0855e1efda8871b357b2bbcda27
--- /dev/null
+++ b/joint_model/checkpoint-1395/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f9f1682436aaf9d161d72dc7e582154d9fbe0f817b46fd9cdc83dc2d8f4ef8f
+size 1465
diff --git a/joint_model/checkpoint-1395/sentencepiece.bpe.model b/joint_model/checkpoint-1395/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/joint_model/checkpoint-1395/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/joint_model/checkpoint-1395/special_tokens_map.json b/joint_model/checkpoint-1395/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/joint_model/checkpoint-1395/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-1395/tokenizer.json b/joint_model/checkpoint-1395/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed
--- /dev/null
+++ b/joint_model/checkpoint-1395/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
+size 17082734
diff --git a/joint_model/checkpoint-1395/tokenizer_config.json b/joint_model/checkpoint-1395/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24
--- /dev/null
+++ b/joint_model/checkpoint-1395/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 512,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-1395/trainer_state.json b/joint_model/checkpoint-1395/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1f28e63598c58f3bc4405df20e259f7eb016acf2
--- /dev/null
+++ b/joint_model/checkpoint-1395/trainer_state.json
@@ -0,0 +1,108 @@
+{
+ "best_global_step": 1395,
+ "best_metric": 0.2682338356971741,
+ "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-1395",
+ "epoch": 5.0,
+ "eval_steps": 500,
+ "global_step": 1395,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy_level1": 0.943,
+ "eval_accuracy_level2": 0.8282,
+ "eval_f1_level1": 0.9424784900051851,
+ "eval_f1_level2": 0.7894811362618394,
+ "eval_loss": 1.1101479530334473,
+ "eval_runtime": 0.6677,
+ "eval_samples_per_second": 7488.47,
+ "eval_steps_per_second": 14.977,
+ "step": 279
+ },
+ {
+ "epoch": 1.7921146953405018,
+ "grad_norm": 8.197423934936523,
+ "learning_rate": 1.642293906810036e-05,
+ "loss": 2.029,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy_level1": 0.963,
+ "eval_accuracy_level2": 0.9134,
+ "eval_f1_level1": 0.962976281751424,
+ "eval_f1_level2": 0.9010101771001547,
+ "eval_loss": 0.5648184418678284,
+ "eval_runtime": 0.6225,
+ "eval_samples_per_second": 8032.157,
+ "eval_steps_per_second": 16.064,
+ "step": 558
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy_level1": 0.9684,
+ "eval_accuracy_level2": 0.9404,
+ "eval_f1_level1": 0.9683869415305786,
+ "eval_f1_level2": 0.9353991249189201,
+ "eval_loss": 0.3886409401893616,
+ "eval_runtime": 0.7756,
+ "eval_samples_per_second": 6447.003,
+ "eval_steps_per_second": 12.894,
+ "step": 837
+ },
+ {
+ "epoch": 3.5842293906810037,
+ "grad_norm": 9.112466812133789,
+ "learning_rate": 1.2838709677419356e-05,
+ "loss": 0.5009,
+ "step": 1000
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy_level1": 0.976,
+ "eval_accuracy_level2": 0.948,
+ "eval_f1_level1": 0.9759892345948809,
+ "eval_f1_level2": 0.944852130743217,
+ "eval_loss": 0.3113965690135956,
+ "eval_runtime": 0.7752,
+ "eval_samples_per_second": 6449.701,
+ "eval_steps_per_second": 12.899,
+ "step": 1116
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy_level1": 0.9786,
+ "eval_accuracy_level2": 0.9574,
+ "eval_f1_level1": 0.9786086703124734,
+ "eval_f1_level2": 0.9558414971682437,
+ "eval_loss": 0.2682338356971741,
+ "eval_runtime": 0.7265,
+ "eval_samples_per_second": 6882.558,
+ "eval_steps_per_second": 13.765,
+ "step": 1395
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2790,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 3.672573129575629e+16,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/joint_model/checkpoint-1395/training_args.bin b/joint_model/checkpoint-1395/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f
--- /dev/null
+++ b/joint_model/checkpoint-1395/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
+size 5777
diff --git a/joint_model/checkpoint-1674/config.json b/joint_model/checkpoint-1674/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e
--- /dev/null
+++ b/joint_model/checkpoint-1674/config.json
@@ -0,0 +1,27 @@
+{
+ "architectures": [
+ "HierarchicalXLMRoberta"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "bos_token_id": 0,
+ "classifier_dropout": null,
+ "dtype": "float32",
+ "eos_token_id": 2,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_norm_eps": 1e-05,
+ "max_position_embeddings": 514,
+ "model_type": "xlm-roberta",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "position_embedding_type": "absolute",
+ "transformers_version": "4.56.1",
+ "type_vocab_size": 1,
+ "use_cache": true,
+ "vocab_size": 250002
+}
diff --git a/joint_model/checkpoint-1674/model.safetensors b/joint_model/checkpoint-1674/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a997964889512431292718f1ff67f0006b63ddb5
--- /dev/null
+++ b/joint_model/checkpoint-1674/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07732aadf5ff31e4f11ced15e3b3f675dcec364a1c0b188e03180a6719bbc0bc
+size 1112408092
diff --git a/joint_model/checkpoint-1674/optimizer.pt b/joint_model/checkpoint-1674/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c09f59af2e43004188e0734d98115af85306c60f
--- /dev/null
+++ b/joint_model/checkpoint-1674/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c772fa9b4c710c806b65a2db5c8842dd78cbf61f45a0b88ae50f5bedc42508d0
+size 2224937355
diff --git a/joint_model/checkpoint-1674/rng_state_0.pth b/joint_model/checkpoint-1674/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5c5b089603c6bed48b4f1765cdc060e30df0d96e
--- /dev/null
+++ b/joint_model/checkpoint-1674/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:244fd61bffb73015ca283aaf0b690244b4bf656c6c488f789153206cc5ca419c
+size 16389
diff --git a/joint_model/checkpoint-1674/rng_state_1.pth b/joint_model/checkpoint-1674/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..35b663e5109c96433353341cbcebbb9cfa733d71
--- /dev/null
+++ b/joint_model/checkpoint-1674/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a85bf43502cd59c3e7002fdd28d84f2755d1d50b9f8395ece5cce57a33e6a2de
+size 16389
diff --git a/joint_model/checkpoint-1674/rng_state_2.pth b/joint_model/checkpoint-1674/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f3e31ee1ff633db8b6bb880049319c3207f67964
--- /dev/null
+++ b/joint_model/checkpoint-1674/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f36f28619c5d75933e40cf2ed1e4be028945b0f4185cc9e11476f543d2e8d7c
+size 16389
diff --git a/joint_model/checkpoint-1674/rng_state_3.pth b/joint_model/checkpoint-1674/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2fe3ac6db09cda87bd31d2e4ec0716a985746e95
--- /dev/null
+++ b/joint_model/checkpoint-1674/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b838f4cf0f902e11d576f4b832468a92c12d05620e73c537a7302c86e09c2752
+size 16389
diff --git a/joint_model/checkpoint-1674/rng_state_4.pth b/joint_model/checkpoint-1674/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..383ae6b5b82ac4453d4ca1b20354b92421e9c690
--- /dev/null
+++ b/joint_model/checkpoint-1674/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c946fcb7164ab82957cc2fd2229fec1ea962b6eb3608757ad9302a5956a5782
+size 16389
diff --git a/joint_model/checkpoint-1674/rng_state_5.pth b/joint_model/checkpoint-1674/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ff350b2c79149fd18f8cd8b2c243385d2382b2d5
--- /dev/null
+++ b/joint_model/checkpoint-1674/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1498a3eb9fd6a568db26513c62c91ef104aa0da2637df120f18cbd8604a6fefc
+size 16389
diff --git a/joint_model/checkpoint-1674/rng_state_6.pth b/joint_model/checkpoint-1674/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5b9dd031fec5e31d58a482247d8a2a3305f962dd
--- /dev/null
+++ b/joint_model/checkpoint-1674/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f449c8299ada96be66c0e08d0603887c01bcef56d3f3c2bf63a0fc8a43664aa
+size 16389
diff --git a/joint_model/checkpoint-1674/rng_state_7.pth b/joint_model/checkpoint-1674/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6dbef8d8c724d316bf25504254d02169bb817a41
--- /dev/null
+++ b/joint_model/checkpoint-1674/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a2a661a72b9f31cca93340f54a4c320474844987c784955cd6cf6c6f2d2cd65
+size 16389
diff --git a/joint_model/checkpoint-1674/scheduler.pt b/joint_model/checkpoint-1674/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..064005b649997d8373a278bbbb208eb738923aef
--- /dev/null
+++ b/joint_model/checkpoint-1674/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e9460e5ee67dc8bfa729d93fdbc410d0dd92c60badc5c6eac4d11137e2d9011
+size 1465
diff --git a/joint_model/checkpoint-1674/sentencepiece.bpe.model b/joint_model/checkpoint-1674/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/joint_model/checkpoint-1674/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/joint_model/checkpoint-1674/special_tokens_map.json b/joint_model/checkpoint-1674/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/joint_model/checkpoint-1674/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-1674/tokenizer.json b/joint_model/checkpoint-1674/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed
--- /dev/null
+++ b/joint_model/checkpoint-1674/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
+size 17082734
diff --git a/joint_model/checkpoint-1674/tokenizer_config.json b/joint_model/checkpoint-1674/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24
--- /dev/null
+++ b/joint_model/checkpoint-1674/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 512,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-1674/trainer_state.json b/joint_model/checkpoint-1674/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..576b9fc95ea9680896ce0940eb0185a7c41e6081
--- /dev/null
+++ b/joint_model/checkpoint-1674/trainer_state.json
@@ -0,0 +1,127 @@
+{
+ "best_global_step": 1674,
+ "best_metric": 0.2282806634902954,
+ "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-1674",
+ "epoch": 6.0,
+ "eval_steps": 500,
+ "global_step": 1674,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy_level1": 0.943,
+ "eval_accuracy_level2": 0.8282,
+ "eval_f1_level1": 0.9424784900051851,
+ "eval_f1_level2": 0.7894811362618394,
+ "eval_loss": 1.1101479530334473,
+ "eval_runtime": 0.6677,
+ "eval_samples_per_second": 7488.47,
+ "eval_steps_per_second": 14.977,
+ "step": 279
+ },
+ {
+ "epoch": 1.7921146953405018,
+ "grad_norm": 8.197423934936523,
+ "learning_rate": 1.642293906810036e-05,
+ "loss": 2.029,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy_level1": 0.963,
+ "eval_accuracy_level2": 0.9134,
+ "eval_f1_level1": 0.962976281751424,
+ "eval_f1_level2": 0.9010101771001547,
+ "eval_loss": 0.5648184418678284,
+ "eval_runtime": 0.6225,
+ "eval_samples_per_second": 8032.157,
+ "eval_steps_per_second": 16.064,
+ "step": 558
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy_level1": 0.9684,
+ "eval_accuracy_level2": 0.9404,
+ "eval_f1_level1": 0.9683869415305786,
+ "eval_f1_level2": 0.9353991249189201,
+ "eval_loss": 0.3886409401893616,
+ "eval_runtime": 0.7756,
+ "eval_samples_per_second": 6447.003,
+ "eval_steps_per_second": 12.894,
+ "step": 837
+ },
+ {
+ "epoch": 3.5842293906810037,
+ "grad_norm": 9.112466812133789,
+ "learning_rate": 1.2838709677419356e-05,
+ "loss": 0.5009,
+ "step": 1000
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy_level1": 0.976,
+ "eval_accuracy_level2": 0.948,
+ "eval_f1_level1": 0.9759892345948809,
+ "eval_f1_level2": 0.944852130743217,
+ "eval_loss": 0.3113965690135956,
+ "eval_runtime": 0.7752,
+ "eval_samples_per_second": 6449.701,
+ "eval_steps_per_second": 12.899,
+ "step": 1116
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy_level1": 0.9786,
+ "eval_accuracy_level2": 0.9574,
+ "eval_f1_level1": 0.9786086703124734,
+ "eval_f1_level2": 0.9558414971682437,
+ "eval_loss": 0.2682338356971741,
+ "eval_runtime": 0.7265,
+ "eval_samples_per_second": 6882.558,
+ "eval_steps_per_second": 13.765,
+ "step": 1395
+ },
+ {
+ "epoch": 5.376344086021505,
+ "grad_norm": 5.6989006996154785,
+ "learning_rate": 9.254480286738352e-06,
+ "loss": 0.2818,
+ "step": 1500
+ },
+ {
+ "epoch": 6.0,
+ "eval_accuracy_level1": 0.9804,
+ "eval_accuracy_level2": 0.9642,
+ "eval_f1_level1": 0.9803982577567285,
+ "eval_f1_level2": 0.9638909785156317,
+ "eval_loss": 0.2282806634902954,
+ "eval_runtime": 0.5242,
+ "eval_samples_per_second": 9538.102,
+ "eval_steps_per_second": 19.076,
+ "step": 1674
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2790,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 4.407087779649946e+16,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/joint_model/checkpoint-1674/training_args.bin b/joint_model/checkpoint-1674/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f
--- /dev/null
+++ b/joint_model/checkpoint-1674/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
+size 5777
diff --git a/joint_model/checkpoint-1953/config.json b/joint_model/checkpoint-1953/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e
--- /dev/null
+++ b/joint_model/checkpoint-1953/config.json
@@ -0,0 +1,27 @@
+{
+ "architectures": [
+ "HierarchicalXLMRoberta"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "bos_token_id": 0,
+ "classifier_dropout": null,
+ "dtype": "float32",
+ "eos_token_id": 2,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_norm_eps": 1e-05,
+ "max_position_embeddings": 514,
+ "model_type": "xlm-roberta",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "position_embedding_type": "absolute",
+ "transformers_version": "4.56.1",
+ "type_vocab_size": 1,
+ "use_cache": true,
+ "vocab_size": 250002
+}
diff --git a/joint_model/checkpoint-1953/model.safetensors b/joint_model/checkpoint-1953/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a5485e04bd7b63127d59a7bd6f54179feb8def90
--- /dev/null
+++ b/joint_model/checkpoint-1953/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad4a7cf70e775f9af2c6e7bfe2b1d85e2ac707cca504fbcf90f2b8eab888da6c
+size 1112408092
diff --git a/joint_model/checkpoint-1953/optimizer.pt b/joint_model/checkpoint-1953/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22e3d7c80a42f9b7918c97eba412c33d7ef4ecb6
--- /dev/null
+++ b/joint_model/checkpoint-1953/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1f5db5ddff0f46638f8ffcd7a3348a901dc2e59319f2dad9e72c0b43e1cfb14
+size 2224937355
diff --git a/joint_model/checkpoint-1953/rng_state_0.pth b/joint_model/checkpoint-1953/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..097ae6c2646458060a404e3bea1b06745616853e
--- /dev/null
+++ b/joint_model/checkpoint-1953/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d7419b306e44cd29b99927850e0762e6cedc8e78103e4ee470068182e831ec9
+size 16389
diff --git a/joint_model/checkpoint-1953/rng_state_1.pth b/joint_model/checkpoint-1953/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a10c34b9a07acca7ab91be6a6664d7be31c99f5b
--- /dev/null
+++ b/joint_model/checkpoint-1953/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee7c126631ead2a377084a6afb17c9a6517adce641f074de6a2da6615bc1cec4
+size 16389
diff --git a/joint_model/checkpoint-1953/rng_state_2.pth b/joint_model/checkpoint-1953/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7be8101cdb047773d47ff0dc0ca1a7347b3488bd
--- /dev/null
+++ b/joint_model/checkpoint-1953/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50796af5be06a84e49c3af4c44824688014982d1b23fc31209ffbdadef38568c
+size 16389
diff --git a/joint_model/checkpoint-1953/rng_state_3.pth b/joint_model/checkpoint-1953/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..822e20adcb64ffe68de49612c395f5ce6935b200
--- /dev/null
+++ b/joint_model/checkpoint-1953/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50cda81dbdc54a37b3833a921f18435947d7bd013b96c41cea0c164113211deb
+size 16389
diff --git a/joint_model/checkpoint-1953/rng_state_4.pth b/joint_model/checkpoint-1953/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..85da0c7a134eed822359cba8c14ef402ce730701
--- /dev/null
+++ b/joint_model/checkpoint-1953/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5149428d8ee6900fe8130620cadd9eb6d9d978b5b4186595866121d9525f3379
+size 16389
diff --git a/joint_model/checkpoint-1953/rng_state_5.pth b/joint_model/checkpoint-1953/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..15ec9a3284b8ac4784590de13799f4218eb4abf3
--- /dev/null
+++ b/joint_model/checkpoint-1953/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:facb048b78a826056bc8bc47acf1a11f9ebcd8c625072a65a9bee9aee2cbbcbf
+size 16389
diff --git a/joint_model/checkpoint-1953/rng_state_6.pth b/joint_model/checkpoint-1953/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f06cbfd938841ffc0ab3cb2e39f967b2b2732f86
--- /dev/null
+++ b/joint_model/checkpoint-1953/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e434b51619e210224fcd0175caee94b66a555a4e3dc46a4c661efe095b7c6aea
+size 16389
diff --git a/joint_model/checkpoint-1953/rng_state_7.pth b/joint_model/checkpoint-1953/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bb30ddffa9cd236687f556fa691307d1a94f291d
--- /dev/null
+++ b/joint_model/checkpoint-1953/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc01d772e9ab257c6d19c7045e95189649fd957d32d97c5e18ce514bfb85c56b
+size 16389
diff --git a/joint_model/checkpoint-1953/scheduler.pt b/joint_model/checkpoint-1953/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5f53b16cfb4ea8185895ba465248031f8a970d08
--- /dev/null
+++ b/joint_model/checkpoint-1953/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1510e7a38b890586532c1cb5fa88825d9a7ec78a98c988a18d6545b1b548c37
+size 1465
diff --git a/joint_model/checkpoint-1953/sentencepiece.bpe.model b/joint_model/checkpoint-1953/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/joint_model/checkpoint-1953/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/joint_model/checkpoint-1953/special_tokens_map.json b/joint_model/checkpoint-1953/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/joint_model/checkpoint-1953/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-1953/tokenizer.json b/joint_model/checkpoint-1953/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed
--- /dev/null
+++ b/joint_model/checkpoint-1953/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
+size 17082734
diff --git a/joint_model/checkpoint-1953/tokenizer_config.json b/joint_model/checkpoint-1953/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24
--- /dev/null
+++ b/joint_model/checkpoint-1953/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 512,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-1953/trainer_state.json b/joint_model/checkpoint-1953/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..74468f58bb509f93a6f7247345e9bf1f95cc81fa
--- /dev/null
+++ b/joint_model/checkpoint-1953/trainer_state.json
@@ -0,0 +1,139 @@
+{
+ "best_global_step": 1953,
+ "best_metric": 0.2116653025150299,
+ "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-1953",
+ "epoch": 7.0,
+ "eval_steps": 500,
+ "global_step": 1953,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy_level1": 0.943,
+ "eval_accuracy_level2": 0.8282,
+ "eval_f1_level1": 0.9424784900051851,
+ "eval_f1_level2": 0.7894811362618394,
+ "eval_loss": 1.1101479530334473,
+ "eval_runtime": 0.6677,
+ "eval_samples_per_second": 7488.47,
+ "eval_steps_per_second": 14.977,
+ "step": 279
+ },
+ {
+ "epoch": 1.7921146953405018,
+ "grad_norm": 8.197423934936523,
+ "learning_rate": 1.642293906810036e-05,
+ "loss": 2.029,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy_level1": 0.963,
+ "eval_accuracy_level2": 0.9134,
+ "eval_f1_level1": 0.962976281751424,
+ "eval_f1_level2": 0.9010101771001547,
+ "eval_loss": 0.5648184418678284,
+ "eval_runtime": 0.6225,
+ "eval_samples_per_second": 8032.157,
+ "eval_steps_per_second": 16.064,
+ "step": 558
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy_level1": 0.9684,
+ "eval_accuracy_level2": 0.9404,
+ "eval_f1_level1": 0.9683869415305786,
+ "eval_f1_level2": 0.9353991249189201,
+ "eval_loss": 0.3886409401893616,
+ "eval_runtime": 0.7756,
+ "eval_samples_per_second": 6447.003,
+ "eval_steps_per_second": 12.894,
+ "step": 837
+ },
+ {
+ "epoch": 3.5842293906810037,
+ "grad_norm": 9.112466812133789,
+ "learning_rate": 1.2838709677419356e-05,
+ "loss": 0.5009,
+ "step": 1000
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy_level1": 0.976,
+ "eval_accuracy_level2": 0.948,
+ "eval_f1_level1": 0.9759892345948809,
+ "eval_f1_level2": 0.944852130743217,
+ "eval_loss": 0.3113965690135956,
+ "eval_runtime": 0.7752,
+ "eval_samples_per_second": 6449.701,
+ "eval_steps_per_second": 12.899,
+ "step": 1116
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy_level1": 0.9786,
+ "eval_accuracy_level2": 0.9574,
+ "eval_f1_level1": 0.9786086703124734,
+ "eval_f1_level2": 0.9558414971682437,
+ "eval_loss": 0.2682338356971741,
+ "eval_runtime": 0.7265,
+ "eval_samples_per_second": 6882.558,
+ "eval_steps_per_second": 13.765,
+ "step": 1395
+ },
+ {
+ "epoch": 5.376344086021505,
+ "grad_norm": 5.6989006996154785,
+ "learning_rate": 9.254480286738352e-06,
+ "loss": 0.2818,
+ "step": 1500
+ },
+ {
+ "epoch": 6.0,
+ "eval_accuracy_level1": 0.9804,
+ "eval_accuracy_level2": 0.9642,
+ "eval_f1_level1": 0.9803982577567285,
+ "eval_f1_level2": 0.9638909785156317,
+ "eval_loss": 0.2282806634902954,
+ "eval_runtime": 0.5242,
+ "eval_samples_per_second": 9538.102,
+ "eval_steps_per_second": 19.076,
+ "step": 1674
+ },
+ {
+ "epoch": 7.0,
+ "eval_accuracy_level1": 0.9824,
+ "eval_accuracy_level2": 0.9664,
+ "eval_f1_level1": 0.9823983306464108,
+ "eval_f1_level2": 0.966154693269809,
+ "eval_loss": 0.2116653025150299,
+ "eval_runtime": 0.4889,
+ "eval_samples_per_second": 10226.528,
+ "eval_steps_per_second": 20.453,
+ "step": 1953
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2790,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 5.141602402880717e+16,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/joint_model/checkpoint-1953/training_args.bin b/joint_model/checkpoint-1953/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f
--- /dev/null
+++ b/joint_model/checkpoint-1953/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
+size 5777
diff --git a/joint_model/checkpoint-2232/config.json b/joint_model/checkpoint-2232/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e
--- /dev/null
+++ b/joint_model/checkpoint-2232/config.json
@@ -0,0 +1,27 @@
+{
+ "architectures": [
+ "HierarchicalXLMRoberta"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "bos_token_id": 0,
+ "classifier_dropout": null,
+ "dtype": "float32",
+ "eos_token_id": 2,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_norm_eps": 1e-05,
+ "max_position_embeddings": 514,
+ "model_type": "xlm-roberta",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "position_embedding_type": "absolute",
+ "transformers_version": "4.56.1",
+ "type_vocab_size": 1,
+ "use_cache": true,
+ "vocab_size": 250002
+}
diff --git a/joint_model/checkpoint-2232/model.safetensors b/joint_model/checkpoint-2232/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b29bd8b2544202a153a4b162d7148bbb5b40d60
--- /dev/null
+++ b/joint_model/checkpoint-2232/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f2e64493da2c4a5265ba172fbc217c41a49ae42cdf8910d07ea3db2c4f3221b
+size 1112408092
diff --git a/joint_model/checkpoint-2232/optimizer.pt b/joint_model/checkpoint-2232/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..24f0b08945bbc24ca418a1b30e57554970e44568
--- /dev/null
+++ b/joint_model/checkpoint-2232/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f63178dd3e9619750df7cb2c82453b9a1cd61e41545953b97c7e1c3f86a4c4cc
+size 2224937355
diff --git a/joint_model/checkpoint-2232/rng_state_0.pth b/joint_model/checkpoint-2232/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..aaf1830e296a77d397e07fd1bc06f5bbb4cc1f29
--- /dev/null
+++ b/joint_model/checkpoint-2232/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eab41ef9c0e4a88cfd3ffc22fae20763c1be99c9635ddf59900314b6cf68124f
+size 16389
diff --git a/joint_model/checkpoint-2232/rng_state_1.pth b/joint_model/checkpoint-2232/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a55141c3022fe6ce66dbb82049f343399de15284
--- /dev/null
+++ b/joint_model/checkpoint-2232/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21d03ccfd914586b01f981cc8eca5be1052848977ebd9804e886146d0d91a735
+size 16389
diff --git a/joint_model/checkpoint-2232/rng_state_2.pth b/joint_model/checkpoint-2232/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f240159ad3aeb7b9854160d91d86e7fae7976a49
--- /dev/null
+++ b/joint_model/checkpoint-2232/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04d4994f524975ea9f9c147a27b7717c4cba700985878d76a0aa96bc52790e62
+size 16389
diff --git a/joint_model/checkpoint-2232/rng_state_3.pth b/joint_model/checkpoint-2232/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fc0c4c8843453647fe03f639d755ba7d0cfdc973
--- /dev/null
+++ b/joint_model/checkpoint-2232/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9420abfb9dcc1ae0d2ad1549454098fe9f673c85855e8857b7d340dbe4c1895c
+size 16389
diff --git a/joint_model/checkpoint-2232/rng_state_4.pth b/joint_model/checkpoint-2232/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dd23027d663c36a63327eb76e3d9a1ec33bef33d
--- /dev/null
+++ b/joint_model/checkpoint-2232/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e77c2911a1fa1ff01d875bd3b1554286911c193b6e3302c81dcb970e5796879e
+size 16389
diff --git a/joint_model/checkpoint-2232/rng_state_5.pth b/joint_model/checkpoint-2232/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..193400054dd0e7979768a6202b0019542a69ad63
--- /dev/null
+++ b/joint_model/checkpoint-2232/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f93c4d2a90bba85d3c678944bc6e06bb3a24a4766908921a43290f4f9615c7da
+size 16389
diff --git a/joint_model/checkpoint-2232/rng_state_6.pth b/joint_model/checkpoint-2232/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..edc588358a6e5c64701a7740c420bfdedd8f42d8
--- /dev/null
+++ b/joint_model/checkpoint-2232/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82b4e921e87cd188b7e4483a1ebd9a4725ce1a9e4efef978c3c6878739fb28fd
+size 16389
diff --git a/joint_model/checkpoint-2232/rng_state_7.pth b/joint_model/checkpoint-2232/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a44733e2dda0da1b9233d79f4b9649e52b8c8ec9
--- /dev/null
+++ b/joint_model/checkpoint-2232/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7628dc2de271047f4382d05b7cb261ca1bc8dbfa648ad5b59faaf19b3c5e3d15
+size 16389
diff --git a/joint_model/checkpoint-2232/scheduler.pt b/joint_model/checkpoint-2232/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72635c3631850f2f6fe792e5d85960886879ef49
--- /dev/null
+++ b/joint_model/checkpoint-2232/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fedf9863e48d7786659fe61566f5ceff4fd963097de5304c334acc258fc3d82d
+size 1465
diff --git a/joint_model/checkpoint-2232/sentencepiece.bpe.model b/joint_model/checkpoint-2232/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/joint_model/checkpoint-2232/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/joint_model/checkpoint-2232/special_tokens_map.json b/joint_model/checkpoint-2232/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/joint_model/checkpoint-2232/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-2232/tokenizer.json b/joint_model/checkpoint-2232/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed
--- /dev/null
+++ b/joint_model/checkpoint-2232/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
+size 17082734
diff --git a/joint_model/checkpoint-2232/tokenizer_config.json b/joint_model/checkpoint-2232/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24
--- /dev/null
+++ b/joint_model/checkpoint-2232/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 512,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-2232/trainer_state.json b/joint_model/checkpoint-2232/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d6361bdbc1b1b0bf58cc943470bd9f9af93e4bcb
--- /dev/null
+++ b/joint_model/checkpoint-2232/trainer_state.json
@@ -0,0 +1,158 @@
+{
+ "best_global_step": 2232,
+ "best_metric": 0.20548628270626068,
+ "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-2232",
+ "epoch": 8.0,
+ "eval_steps": 500,
+ "global_step": 2232,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy_level1": 0.943,
+ "eval_accuracy_level2": 0.8282,
+ "eval_f1_level1": 0.9424784900051851,
+ "eval_f1_level2": 0.7894811362618394,
+ "eval_loss": 1.1101479530334473,
+ "eval_runtime": 0.6677,
+ "eval_samples_per_second": 7488.47,
+ "eval_steps_per_second": 14.977,
+ "step": 279
+ },
+ {
+ "epoch": 1.7921146953405018,
+ "grad_norm": 8.197423934936523,
+ "learning_rate": 1.642293906810036e-05,
+ "loss": 2.029,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy_level1": 0.963,
+ "eval_accuracy_level2": 0.9134,
+ "eval_f1_level1": 0.962976281751424,
+ "eval_f1_level2": 0.9010101771001547,
+ "eval_loss": 0.5648184418678284,
+ "eval_runtime": 0.6225,
+ "eval_samples_per_second": 8032.157,
+ "eval_steps_per_second": 16.064,
+ "step": 558
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy_level1": 0.9684,
+ "eval_accuracy_level2": 0.9404,
+ "eval_f1_level1": 0.9683869415305786,
+ "eval_f1_level2": 0.9353991249189201,
+ "eval_loss": 0.3886409401893616,
+ "eval_runtime": 0.7756,
+ "eval_samples_per_second": 6447.003,
+ "eval_steps_per_second": 12.894,
+ "step": 837
+ },
+ {
+ "epoch": 3.5842293906810037,
+ "grad_norm": 9.112466812133789,
+ "learning_rate": 1.2838709677419356e-05,
+ "loss": 0.5009,
+ "step": 1000
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy_level1": 0.976,
+ "eval_accuracy_level2": 0.948,
+ "eval_f1_level1": 0.9759892345948809,
+ "eval_f1_level2": 0.944852130743217,
+ "eval_loss": 0.3113965690135956,
+ "eval_runtime": 0.7752,
+ "eval_samples_per_second": 6449.701,
+ "eval_steps_per_second": 12.899,
+ "step": 1116
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy_level1": 0.9786,
+ "eval_accuracy_level2": 0.9574,
+ "eval_f1_level1": 0.9786086703124734,
+ "eval_f1_level2": 0.9558414971682437,
+ "eval_loss": 0.2682338356971741,
+ "eval_runtime": 0.7265,
+ "eval_samples_per_second": 6882.558,
+ "eval_steps_per_second": 13.765,
+ "step": 1395
+ },
+ {
+ "epoch": 5.376344086021505,
+ "grad_norm": 5.6989006996154785,
+ "learning_rate": 9.254480286738352e-06,
+ "loss": 0.2818,
+ "step": 1500
+ },
+ {
+ "epoch": 6.0,
+ "eval_accuracy_level1": 0.9804,
+ "eval_accuracy_level2": 0.9642,
+ "eval_f1_level1": 0.9803982577567285,
+ "eval_f1_level2": 0.9638909785156317,
+ "eval_loss": 0.2282806634902954,
+ "eval_runtime": 0.5242,
+ "eval_samples_per_second": 9538.102,
+ "eval_steps_per_second": 19.076,
+ "step": 1674
+ },
+ {
+ "epoch": 7.0,
+ "eval_accuracy_level1": 0.9824,
+ "eval_accuracy_level2": 0.9664,
+ "eval_f1_level1": 0.9823983306464108,
+ "eval_f1_level2": 0.966154693269809,
+ "eval_loss": 0.2116653025150299,
+ "eval_runtime": 0.4889,
+ "eval_samples_per_second": 10226.528,
+ "eval_steps_per_second": 20.453,
+ "step": 1953
+ },
+ {
+ "epoch": 7.168458781362007,
+ "grad_norm": 7.060131072998047,
+ "learning_rate": 5.670250896057348e-06,
+ "loss": 0.1994,
+ "step": 2000
+ },
+ {
+ "epoch": 8.0,
+ "eval_accuracy_level1": 0.9826,
+ "eval_accuracy_level2": 0.969,
+ "eval_f1_level1": 0.9826127380767093,
+ "eval_f1_level2": 0.9688130224440087,
+ "eval_loss": 0.20548628270626068,
+ "eval_runtime": 0.5897,
+ "eval_samples_per_second": 8478.651,
+ "eval_steps_per_second": 16.957,
+ "step": 2232
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2790,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 5.876117039533261e+16,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/joint_model/checkpoint-2232/training_args.bin b/joint_model/checkpoint-2232/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f
--- /dev/null
+++ b/joint_model/checkpoint-2232/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
+size 5777
diff --git a/joint_model/checkpoint-2511/config.json b/joint_model/checkpoint-2511/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e
--- /dev/null
+++ b/joint_model/checkpoint-2511/config.json
@@ -0,0 +1,27 @@
+{
+ "architectures": [
+ "HierarchicalXLMRoberta"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "bos_token_id": 0,
+ "classifier_dropout": null,
+ "dtype": "float32",
+ "eos_token_id": 2,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_norm_eps": 1e-05,
+ "max_position_embeddings": 514,
+ "model_type": "xlm-roberta",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "position_embedding_type": "absolute",
+ "transformers_version": "4.56.1",
+ "type_vocab_size": 1,
+ "use_cache": true,
+ "vocab_size": 250002
+}
diff --git a/joint_model/checkpoint-2511/model.safetensors b/joint_model/checkpoint-2511/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..126da99d20433885933fe3030f0db43d4a08451b
--- /dev/null
+++ b/joint_model/checkpoint-2511/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b58a75dd0fb82a6466f8c24a7e148d1865aeb496457e2e915fbedf90ab56337c
+size 1112408092
diff --git a/joint_model/checkpoint-2511/optimizer.pt b/joint_model/checkpoint-2511/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..15a59810d5bd62ab00128869dcc3a0bcc517aae7
--- /dev/null
+++ b/joint_model/checkpoint-2511/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e17e16d6ec1e2dbc9f1335e801182aa45c918f19e1ee4d04f9ea9063cfb0228
+size 2224937355
diff --git a/joint_model/checkpoint-2511/rng_state_0.pth b/joint_model/checkpoint-2511/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b4820f118dae3ac7345b9b4ab4959e9f90b24c24
--- /dev/null
+++ b/joint_model/checkpoint-2511/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6cd5cb87ba9a49bfdede4fd7017cfe7aa4a16704948a38db3b45a4435b088d63
+size 16389
diff --git a/joint_model/checkpoint-2511/rng_state_1.pth b/joint_model/checkpoint-2511/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..17ae64b9a005901030ac3f456a231ed7bedf7cd8
--- /dev/null
+++ b/joint_model/checkpoint-2511/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88bceb63645c6a72c5a3a14d7d8f53e4a57975cfa318ce618aa0364fa2fa178e
+size 16389
diff --git a/joint_model/checkpoint-2511/rng_state_2.pth b/joint_model/checkpoint-2511/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..33629e7e4d47b1086848f31975a777515dd794fa
--- /dev/null
+++ b/joint_model/checkpoint-2511/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e26d53ef797d2b8afd11fa8018f8a2a18275514a7db4fa0809bd2e64b694ca40
+size 16389
diff --git a/joint_model/checkpoint-2511/rng_state_3.pth b/joint_model/checkpoint-2511/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..28e1f268c2c22ab86b98762ee7c2ec73ceaf25d5
--- /dev/null
+++ b/joint_model/checkpoint-2511/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75d8277c6e380faee00d4c15a1df8d26aeb12735792bf3dc07300e1eebf34e11
+size 16389
diff --git a/joint_model/checkpoint-2511/rng_state_4.pth b/joint_model/checkpoint-2511/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a42bfc01108f4a0222e35259ba90400a46d51e5c
--- /dev/null
+++ b/joint_model/checkpoint-2511/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a66ef23029bb31289cbf241042e70cec294b9e8377e3e81b6d32a4262ec5470
+size 16389
diff --git a/joint_model/checkpoint-2511/rng_state_5.pth b/joint_model/checkpoint-2511/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0eac2ec1cb33dd7ddec47ac06ae363e9cb594cd5
--- /dev/null
+++ b/joint_model/checkpoint-2511/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcd7a02720f74c54a892bd969f94dd8a735232108c002ca73baccd6a22ff4736
+size 16389
diff --git a/joint_model/checkpoint-2511/rng_state_6.pth b/joint_model/checkpoint-2511/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..88f1cc8fe6207405dabcfde23c70eecdccfef402
--- /dev/null
+++ b/joint_model/checkpoint-2511/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2584a385a3872614bcaf9f2ca9398b4492141d0b4a293fecac27f643fb7d258
+size 16389
diff --git a/joint_model/checkpoint-2511/rng_state_7.pth b/joint_model/checkpoint-2511/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..83736f35497a03caff8739f673b0d83e50664511
--- /dev/null
+++ b/joint_model/checkpoint-2511/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da4c640c23994e085f14f83dccc0fca6c13410ca57c2c31b7acb350abf0bcae5
+size 16389
diff --git a/joint_model/checkpoint-2511/scheduler.pt b/joint_model/checkpoint-2511/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c14d72a2b24b39ade872c412fdb3bb03167acbd
--- /dev/null
+++ b/joint_model/checkpoint-2511/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:787980f142eded8d1cd676805f377fbdccd4c80688bd8d0f341162b7115eab43
+size 1465
diff --git a/joint_model/checkpoint-2511/sentencepiece.bpe.model b/joint_model/checkpoint-2511/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/joint_model/checkpoint-2511/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/joint_model/checkpoint-2511/special_tokens_map.json b/joint_model/checkpoint-2511/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/joint_model/checkpoint-2511/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-2511/tokenizer.json b/joint_model/checkpoint-2511/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed
--- /dev/null
+++ b/joint_model/checkpoint-2511/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
+size 17082734
diff --git a/joint_model/checkpoint-2511/tokenizer_config.json b/joint_model/checkpoint-2511/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24
--- /dev/null
+++ b/joint_model/checkpoint-2511/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 512,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-2511/trainer_state.json b/joint_model/checkpoint-2511/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e4393486d07a998f376f0f971d78831587db837c
--- /dev/null
+++ b/joint_model/checkpoint-2511/trainer_state.json
@@ -0,0 +1,177 @@
+{
+ "best_global_step": 2511,
+ "best_metric": 0.20010386407375336,
+ "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-2511",
+ "epoch": 9.0,
+ "eval_steps": 500,
+ "global_step": 2511,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy_level1": 0.943,
+ "eval_accuracy_level2": 0.8282,
+ "eval_f1_level1": 0.9424784900051851,
+ "eval_f1_level2": 0.7894811362618394,
+ "eval_loss": 1.1101479530334473,
+ "eval_runtime": 0.6677,
+ "eval_samples_per_second": 7488.47,
+ "eval_steps_per_second": 14.977,
+ "step": 279
+ },
+ {
+ "epoch": 1.7921146953405018,
+ "grad_norm": 8.197423934936523,
+ "learning_rate": 1.642293906810036e-05,
+ "loss": 2.029,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy_level1": 0.963,
+ "eval_accuracy_level2": 0.9134,
+ "eval_f1_level1": 0.962976281751424,
+ "eval_f1_level2": 0.9010101771001547,
+ "eval_loss": 0.5648184418678284,
+ "eval_runtime": 0.6225,
+ "eval_samples_per_second": 8032.157,
+ "eval_steps_per_second": 16.064,
+ "step": 558
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy_level1": 0.9684,
+ "eval_accuracy_level2": 0.9404,
+ "eval_f1_level1": 0.9683869415305786,
+ "eval_f1_level2": 0.9353991249189201,
+ "eval_loss": 0.3886409401893616,
+ "eval_runtime": 0.7756,
+ "eval_samples_per_second": 6447.003,
+ "eval_steps_per_second": 12.894,
+ "step": 837
+ },
+ {
+ "epoch": 3.5842293906810037,
+ "grad_norm": 9.112466812133789,
+ "learning_rate": 1.2838709677419356e-05,
+ "loss": 0.5009,
+ "step": 1000
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy_level1": 0.976,
+ "eval_accuracy_level2": 0.948,
+ "eval_f1_level1": 0.9759892345948809,
+ "eval_f1_level2": 0.944852130743217,
+ "eval_loss": 0.3113965690135956,
+ "eval_runtime": 0.7752,
+ "eval_samples_per_second": 6449.701,
+ "eval_steps_per_second": 12.899,
+ "step": 1116
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy_level1": 0.9786,
+ "eval_accuracy_level2": 0.9574,
+ "eval_f1_level1": 0.9786086703124734,
+ "eval_f1_level2": 0.9558414971682437,
+ "eval_loss": 0.2682338356971741,
+ "eval_runtime": 0.7265,
+ "eval_samples_per_second": 6882.558,
+ "eval_steps_per_second": 13.765,
+ "step": 1395
+ },
+ {
+ "epoch": 5.376344086021505,
+ "grad_norm": 5.6989006996154785,
+ "learning_rate": 9.254480286738352e-06,
+ "loss": 0.2818,
+ "step": 1500
+ },
+ {
+ "epoch": 6.0,
+ "eval_accuracy_level1": 0.9804,
+ "eval_accuracy_level2": 0.9642,
+ "eval_f1_level1": 0.9803982577567285,
+ "eval_f1_level2": 0.9638909785156317,
+ "eval_loss": 0.2282806634902954,
+ "eval_runtime": 0.5242,
+ "eval_samples_per_second": 9538.102,
+ "eval_steps_per_second": 19.076,
+ "step": 1674
+ },
+ {
+ "epoch": 7.0,
+ "eval_accuracy_level1": 0.9824,
+ "eval_accuracy_level2": 0.9664,
+ "eval_f1_level1": 0.9823983306464108,
+ "eval_f1_level2": 0.966154693269809,
+ "eval_loss": 0.2116653025150299,
+ "eval_runtime": 0.4889,
+ "eval_samples_per_second": 10226.528,
+ "eval_steps_per_second": 20.453,
+ "step": 1953
+ },
+ {
+ "epoch": 7.168458781362007,
+ "grad_norm": 7.060131072998047,
+ "learning_rate": 5.670250896057348e-06,
+ "loss": 0.1994,
+ "step": 2000
+ },
+ {
+ "epoch": 8.0,
+ "eval_accuracy_level1": 0.9826,
+ "eval_accuracy_level2": 0.969,
+ "eval_f1_level1": 0.9826127380767093,
+ "eval_f1_level2": 0.9688130224440087,
+ "eval_loss": 0.20548628270626068,
+ "eval_runtime": 0.5897,
+ "eval_samples_per_second": 8478.651,
+ "eval_steps_per_second": 16.957,
+ "step": 2232
+ },
+ {
+ "epoch": 8.960573476702509,
+ "grad_norm": 9.435193061828613,
+ "learning_rate": 2.086021505376344e-06,
+ "loss": 0.1544,
+ "step": 2500
+ },
+ {
+ "epoch": 9.0,
+ "eval_accuracy_level1": 0.9836,
+ "eval_accuracy_level2": 0.9706,
+ "eval_f1_level1": 0.9836193977032266,
+ "eval_f1_level2": 0.9704863338256707,
+ "eval_loss": 0.20010386407375336,
+ "eval_runtime": 0.7263,
+ "eval_samples_per_second": 6884.526,
+ "eval_steps_per_second": 13.769,
+ "step": 2511
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2790,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 6.610631666119475e+16,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/joint_model/checkpoint-2511/training_args.bin b/joint_model/checkpoint-2511/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f
--- /dev/null
+++ b/joint_model/checkpoint-2511/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
+size 5777
diff --git a/joint_model/checkpoint-279/config.json b/joint_model/checkpoint-279/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e
--- /dev/null
+++ b/joint_model/checkpoint-279/config.json
@@ -0,0 +1,27 @@
+{
+ "architectures": [
+ "HierarchicalXLMRoberta"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "bos_token_id": 0,
+ "classifier_dropout": null,
+ "dtype": "float32",
+ "eos_token_id": 2,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_norm_eps": 1e-05,
+ "max_position_embeddings": 514,
+ "model_type": "xlm-roberta",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "position_embedding_type": "absolute",
+ "transformers_version": "4.56.1",
+ "type_vocab_size": 1,
+ "use_cache": true,
+ "vocab_size": 250002
+}
diff --git a/joint_model/checkpoint-279/model.safetensors b/joint_model/checkpoint-279/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7282703f4af5af18dbb4cfd9592c2e7c43f57848
--- /dev/null
+++ b/joint_model/checkpoint-279/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37e2adab4b3aa29e0440ee6529e7e8e4006d09f3192695d2b51d3cc516e14e4b
+size 1112408092
diff --git a/joint_model/checkpoint-279/optimizer.pt b/joint_model/checkpoint-279/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..031bb3dee1f2fe42c40c67ad0bff5534e13e5495
--- /dev/null
+++ b/joint_model/checkpoint-279/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:445bbf18cdcb44b6eb39417a6938c95fd7daf5ef4c088862f8867f3d5c4190cf
+size 2224937355
diff --git a/joint_model/checkpoint-279/rng_state_0.pth b/joint_model/checkpoint-279/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bf445bc77857d4c2335a113c5cc9afe573264d4e
--- /dev/null
+++ b/joint_model/checkpoint-279/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f64d1373a611e74f2173b6b99973eaf69537d8506d436ec8e98d7b7edb7a2a9b
+size 16389
diff --git a/joint_model/checkpoint-279/rng_state_1.pth b/joint_model/checkpoint-279/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ef5aa6bd7e461a5c465001960a75fd47ff24f488
--- /dev/null
+++ b/joint_model/checkpoint-279/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:307e59cec4aa1182c93ae2ab81253ec46d882c9111ee0deece55210ef1b9c09d
+size 16389
diff --git a/joint_model/checkpoint-279/rng_state_2.pth b/joint_model/checkpoint-279/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..12bd35ec04b2466e17f936c746665e5bc30c6584
--- /dev/null
+++ b/joint_model/checkpoint-279/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c9086f7ab23e0beab55e7b326ea890b64daea93653f3b7fe182982f2142e647
+size 16389
diff --git a/joint_model/checkpoint-279/rng_state_3.pth b/joint_model/checkpoint-279/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..42faf6dca4f850042ddef7d67ed10f705190525b
--- /dev/null
+++ b/joint_model/checkpoint-279/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3635d4bd8da07c184a0120bba6b1421f449002105c709301bfef0cae3d8d8773
+size 16389
diff --git a/joint_model/checkpoint-279/rng_state_4.pth b/joint_model/checkpoint-279/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..db77c56f37de7634511dc3b2f5552448dad4783b
--- /dev/null
+++ b/joint_model/checkpoint-279/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:080ca24393ce589e8fbc32ddb1baf7115949b3cda7f562dbed8e243014e45170
+size 16389
diff --git a/joint_model/checkpoint-279/rng_state_5.pth b/joint_model/checkpoint-279/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..62a174fe3f9979f6493bf5057243a070d55991f3
--- /dev/null
+++ b/joint_model/checkpoint-279/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d05bb7ba0220bf8a5afe1ddf326b9c098a4ef4ba5b95ad6a98af48593e6afe2
+size 16389
diff --git a/joint_model/checkpoint-279/rng_state_6.pth b/joint_model/checkpoint-279/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..46b93989a2d5d616316700b6099ba1cb584536db
--- /dev/null
+++ b/joint_model/checkpoint-279/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59383333e0ddfb6ba4803fae0ef2a9832b5b186ae5bce3132326b0dbca291894
+size 16389
diff --git a/joint_model/checkpoint-279/rng_state_7.pth b/joint_model/checkpoint-279/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4d2c04235046272a96fdb3a37f3613d68ae86b4a
--- /dev/null
+++ b/joint_model/checkpoint-279/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3254021ab22193bf6b783682ec194469d102dac3e5e70b5426541d6fef1818e9
+size 16389
diff --git a/joint_model/checkpoint-279/scheduler.pt b/joint_model/checkpoint-279/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..61cc97489bd3cee12969e640db060314c476efdf
--- /dev/null
+++ b/joint_model/checkpoint-279/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e695e4dcb8ae17c008abc10305f554d626be890e5a78b9ec4e0637ec6fa3f51
+size 1465
diff --git a/joint_model/checkpoint-279/sentencepiece.bpe.model b/joint_model/checkpoint-279/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/joint_model/checkpoint-279/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/joint_model/checkpoint-279/special_tokens_map.json b/joint_model/checkpoint-279/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/joint_model/checkpoint-279/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-279/tokenizer.json b/joint_model/checkpoint-279/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed
--- /dev/null
+++ b/joint_model/checkpoint-279/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
+size 17082734
diff --git a/joint_model/checkpoint-279/tokenizer_config.json b/joint_model/checkpoint-279/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24
--- /dev/null
+++ b/joint_model/checkpoint-279/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 512,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-279/trainer_state.json b/joint_model/checkpoint-279/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f1ad0eb3de119d403f6222165ca65ba449b4eb9b
--- /dev/null
+++ b/joint_model/checkpoint-279/trainer_state.json
@@ -0,0 +1,46 @@
+{
+ "best_global_step": 279,
+ "best_metric": 1.1101479530334473,
+ "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-279",
+ "epoch": 1.0,
+ "eval_steps": 500,
+ "global_step": 279,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy_level1": 0.943,
+ "eval_accuracy_level2": 0.8282,
+ "eval_f1_level1": 0.9424784900051851,
+ "eval_f1_level2": 0.7894811362618394,
+ "eval_loss": 1.1101479530334473,
+ "eval_runtime": 0.6677,
+ "eval_samples_per_second": 7488.47,
+ "eval_steps_per_second": 14.977,
+ "step": 279
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2790,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 7345146232307712.0,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/joint_model/checkpoint-279/training_args.bin b/joint_model/checkpoint-279/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f
--- /dev/null
+++ b/joint_model/checkpoint-279/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
+size 5777
diff --git a/joint_model/checkpoint-2790/config.json b/joint_model/checkpoint-2790/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e
--- /dev/null
+++ b/joint_model/checkpoint-2790/config.json
@@ -0,0 +1,27 @@
+{
+ "architectures": [
+ "HierarchicalXLMRoberta"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "bos_token_id": 0,
+ "classifier_dropout": null,
+ "dtype": "float32",
+ "eos_token_id": 2,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_norm_eps": 1e-05,
+ "max_position_embeddings": 514,
+ "model_type": "xlm-roberta",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "position_embedding_type": "absolute",
+ "transformers_version": "4.56.1",
+ "type_vocab_size": 1,
+ "use_cache": true,
+ "vocab_size": 250002
+}
diff --git a/joint_model/checkpoint-2790/model.safetensors b/joint_model/checkpoint-2790/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ded8ab3f80c26714783badc9aa73556f9408638f
--- /dev/null
+++ b/joint_model/checkpoint-2790/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6eb1ccd12e907c8ebc1e8e6300463ba2c384ef7a9f8447a13f02724a1751641
+size 1112408092
diff --git a/joint_model/checkpoint-2790/optimizer.pt b/joint_model/checkpoint-2790/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..be4228bb8cbfb0055ff384a64b1809c199690316
--- /dev/null
+++ b/joint_model/checkpoint-2790/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7eb93eeaa3a053fd7b9bb5b84923284c3da9dc559d8b48604005d7dba36326e
+size 2224937355
diff --git a/joint_model/checkpoint-2790/rng_state_0.pth b/joint_model/checkpoint-2790/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1204dfcda519e30b1f0934f725519038ec1ffe46
--- /dev/null
+++ b/joint_model/checkpoint-2790/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3694df1592907bff7db49c1f38a514da39a18474d33f5396292f06b8881baf1b
+size 16389
diff --git a/joint_model/checkpoint-2790/rng_state_1.pth b/joint_model/checkpoint-2790/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a04284e4bf2d0f7b5651fec53a0a577d2697bc57
--- /dev/null
+++ b/joint_model/checkpoint-2790/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:032463486955ad1b697dd64126b425c51f84df9903692266c4b4ceee9cdc6e66
+size 16389
diff --git a/joint_model/checkpoint-2790/rng_state_2.pth b/joint_model/checkpoint-2790/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..efa00c6512f2c0b8f45bc807becc365234c6c771
--- /dev/null
+++ b/joint_model/checkpoint-2790/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02fc38a0ae9b600f2035ab0f6ffaed56380fd22b3e8ab03879c99168b6eeec9d
+size 16389
diff --git a/joint_model/checkpoint-2790/rng_state_3.pth b/joint_model/checkpoint-2790/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d8397a60e26b1948b1216b50242887961847bbdd
--- /dev/null
+++ b/joint_model/checkpoint-2790/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08fd45f971330bec68c9f5feef2f4ec9980d96e954f2be27a8fe7e2b0d902bfc
+size 16389
diff --git a/joint_model/checkpoint-2790/rng_state_4.pth b/joint_model/checkpoint-2790/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..93d834d206168b209cab778034ba97d5c59b3ad1
--- /dev/null
+++ b/joint_model/checkpoint-2790/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d869459caa121b5225ac1120ee164121cb7dca72fe321f04cba29f30a74c7a11
+size 16389
diff --git a/joint_model/checkpoint-2790/rng_state_5.pth b/joint_model/checkpoint-2790/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e85533f3144b4263ba9c24f896f396170cc5d832
--- /dev/null
+++ b/joint_model/checkpoint-2790/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e39fe9af551dce334937d68faa12a89caef0ed8ad87618af6cf8791ff41b055
+size 16389
diff --git a/joint_model/checkpoint-2790/rng_state_6.pth b/joint_model/checkpoint-2790/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..175cf3b8132a2fc63f434890fbe337add7e0046a
--- /dev/null
+++ b/joint_model/checkpoint-2790/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c9effc7e010cd6483f53db0c185548d596482ec43106c90d3b51e5f407c292a
+size 16389
diff --git a/joint_model/checkpoint-2790/rng_state_7.pth b/joint_model/checkpoint-2790/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9e804207248e61126d283b205e1b1359d910ee44
--- /dev/null
+++ b/joint_model/checkpoint-2790/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9bb317d00046b48dc3d3553fb624dd4664da7ce4e3604b361d470b1f3ed65e9b
+size 16389
diff --git a/joint_model/checkpoint-2790/scheduler.pt b/joint_model/checkpoint-2790/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..044166dc5cd12a1934def5f8cf057ba8f6b56df7
--- /dev/null
+++ b/joint_model/checkpoint-2790/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccef646684698ad189a8d80ef16f815d48dcaa966c52dafed533b1d6a2c27964
+size 1465
diff --git a/joint_model/checkpoint-2790/sentencepiece.bpe.model b/joint_model/checkpoint-2790/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/joint_model/checkpoint-2790/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/joint_model/checkpoint-2790/special_tokens_map.json b/joint_model/checkpoint-2790/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/joint_model/checkpoint-2790/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-2790/tokenizer.json b/joint_model/checkpoint-2790/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed
--- /dev/null
+++ b/joint_model/checkpoint-2790/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
+size 17082734
diff --git a/joint_model/checkpoint-2790/tokenizer_config.json b/joint_model/checkpoint-2790/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24
--- /dev/null
+++ b/joint_model/checkpoint-2790/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 512,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-2790/trainer_state.json b/joint_model/checkpoint-2790/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf8712318298f4048b5872da342f77355711091c
--- /dev/null
+++ b/joint_model/checkpoint-2790/trainer_state.json
@@ -0,0 +1,189 @@
+{
+ "best_global_step": 2790,
+ "best_metric": 0.19661813974380493,
+ "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-2790",
+ "epoch": 10.0,
+ "eval_steps": 500,
+ "global_step": 2790,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy_level1": 0.943,
+ "eval_accuracy_level2": 0.8282,
+ "eval_f1_level1": 0.9424784900051851,
+ "eval_f1_level2": 0.7894811362618394,
+ "eval_loss": 1.1101479530334473,
+ "eval_runtime": 0.6677,
+ "eval_samples_per_second": 7488.47,
+ "eval_steps_per_second": 14.977,
+ "step": 279
+ },
+ {
+ "epoch": 1.7921146953405018,
+ "grad_norm": 8.197423934936523,
+ "learning_rate": 1.642293906810036e-05,
+ "loss": 2.029,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy_level1": 0.963,
+ "eval_accuracy_level2": 0.9134,
+ "eval_f1_level1": 0.962976281751424,
+ "eval_f1_level2": 0.9010101771001547,
+ "eval_loss": 0.5648184418678284,
+ "eval_runtime": 0.6225,
+ "eval_samples_per_second": 8032.157,
+ "eval_steps_per_second": 16.064,
+ "step": 558
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy_level1": 0.9684,
+ "eval_accuracy_level2": 0.9404,
+ "eval_f1_level1": 0.9683869415305786,
+ "eval_f1_level2": 0.9353991249189201,
+ "eval_loss": 0.3886409401893616,
+ "eval_runtime": 0.7756,
+ "eval_samples_per_second": 6447.003,
+ "eval_steps_per_second": 12.894,
+ "step": 837
+ },
+ {
+ "epoch": 3.5842293906810037,
+ "grad_norm": 9.112466812133789,
+ "learning_rate": 1.2838709677419356e-05,
+ "loss": 0.5009,
+ "step": 1000
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy_level1": 0.976,
+ "eval_accuracy_level2": 0.948,
+ "eval_f1_level1": 0.9759892345948809,
+ "eval_f1_level2": 0.944852130743217,
+ "eval_loss": 0.3113965690135956,
+ "eval_runtime": 0.7752,
+ "eval_samples_per_second": 6449.701,
+ "eval_steps_per_second": 12.899,
+ "step": 1116
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy_level1": 0.9786,
+ "eval_accuracy_level2": 0.9574,
+ "eval_f1_level1": 0.9786086703124734,
+ "eval_f1_level2": 0.9558414971682437,
+ "eval_loss": 0.2682338356971741,
+ "eval_runtime": 0.7265,
+ "eval_samples_per_second": 6882.558,
+ "eval_steps_per_second": 13.765,
+ "step": 1395
+ },
+ {
+ "epoch": 5.376344086021505,
+ "grad_norm": 5.6989006996154785,
+ "learning_rate": 9.254480286738352e-06,
+ "loss": 0.2818,
+ "step": 1500
+ },
+ {
+ "epoch": 6.0,
+ "eval_accuracy_level1": 0.9804,
+ "eval_accuracy_level2": 0.9642,
+ "eval_f1_level1": 0.9803982577567285,
+ "eval_f1_level2": 0.9638909785156317,
+ "eval_loss": 0.2282806634902954,
+ "eval_runtime": 0.5242,
+ "eval_samples_per_second": 9538.102,
+ "eval_steps_per_second": 19.076,
+ "step": 1674
+ },
+ {
+ "epoch": 7.0,
+ "eval_accuracy_level1": 0.9824,
+ "eval_accuracy_level2": 0.9664,
+ "eval_f1_level1": 0.9823983306464108,
+ "eval_f1_level2": 0.966154693269809,
+ "eval_loss": 0.2116653025150299,
+ "eval_runtime": 0.4889,
+ "eval_samples_per_second": 10226.528,
+ "eval_steps_per_second": 20.453,
+ "step": 1953
+ },
+ {
+ "epoch": 7.168458781362007,
+ "grad_norm": 7.060131072998047,
+ "learning_rate": 5.670250896057348e-06,
+ "loss": 0.1994,
+ "step": 2000
+ },
+ {
+ "epoch": 8.0,
+ "eval_accuracy_level1": 0.9826,
+ "eval_accuracy_level2": 0.969,
+ "eval_f1_level1": 0.9826127380767093,
+ "eval_f1_level2": 0.9688130224440087,
+ "eval_loss": 0.20548628270626068,
+ "eval_runtime": 0.5897,
+ "eval_samples_per_second": 8478.651,
+ "eval_steps_per_second": 16.957,
+ "step": 2232
+ },
+ {
+ "epoch": 8.960573476702509,
+ "grad_norm": 9.435193061828613,
+ "learning_rate": 2.086021505376344e-06,
+ "loss": 0.1544,
+ "step": 2500
+ },
+ {
+ "epoch": 9.0,
+ "eval_accuracy_level1": 0.9836,
+ "eval_accuracy_level2": 0.9706,
+ "eval_f1_level1": 0.9836193977032266,
+ "eval_f1_level2": 0.9704863338256707,
+ "eval_loss": 0.20010386407375336,
+ "eval_runtime": 0.7263,
+ "eval_samples_per_second": 6884.526,
+ "eval_steps_per_second": 13.769,
+ "step": 2511
+ },
+ {
+ "epoch": 10.0,
+ "eval_accuracy_level1": 0.984,
+ "eval_accuracy_level2": 0.971,
+ "eval_f1_level1": 0.9840142363029732,
+ "eval_f1_level2": 0.970881189240863,
+ "eval_loss": 0.19661813974380493,
+ "eval_runtime": 0.5736,
+ "eval_samples_per_second": 8717.27,
+ "eval_steps_per_second": 17.435,
+ "step": 2790
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2790,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 7.345146289350246e+16,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/joint_model/checkpoint-2790/training_args.bin b/joint_model/checkpoint-2790/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f
--- /dev/null
+++ b/joint_model/checkpoint-2790/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
+size 5777
diff --git a/joint_model/checkpoint-558/config.json b/joint_model/checkpoint-558/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e
--- /dev/null
+++ b/joint_model/checkpoint-558/config.json
@@ -0,0 +1,27 @@
+{
+ "architectures": [
+ "HierarchicalXLMRoberta"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "bos_token_id": 0,
+ "classifier_dropout": null,
+ "dtype": "float32",
+ "eos_token_id": 2,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_norm_eps": 1e-05,
+ "max_position_embeddings": 514,
+ "model_type": "xlm-roberta",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "position_embedding_type": "absolute",
+ "transformers_version": "4.56.1",
+ "type_vocab_size": 1,
+ "use_cache": true,
+ "vocab_size": 250002
+}
diff --git a/joint_model/checkpoint-558/model.safetensors b/joint_model/checkpoint-558/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8ac3fae878321eb6c2e73acd4af336f6a8ae9f34
--- /dev/null
+++ b/joint_model/checkpoint-558/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d0e78392ee14b5f51cac6627a6b55a68afe53ed1472facda429a0fd922ffe4e
+size 1112408092
diff --git a/joint_model/checkpoint-558/optimizer.pt b/joint_model/checkpoint-558/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0b1597e69574355e591c3d4c155fc41c304adcb3
--- /dev/null
+++ b/joint_model/checkpoint-558/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ee05b5923bc1138156211fc9d285635ae0cdea24efd627b57f2e109bea903b4
+size 2224937355
diff --git a/joint_model/checkpoint-558/rng_state_0.pth b/joint_model/checkpoint-558/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..22c59c271aa91ae40a13d484e95c77d6344e7611
--- /dev/null
+++ b/joint_model/checkpoint-558/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d0b08ac33f223cec939ef2a82779f76df73c1b3070d2185aea1d15f26b285ed
+size 16389
diff --git a/joint_model/checkpoint-558/rng_state_1.pth b/joint_model/checkpoint-558/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1b77031dcb9050662cd5baa3d1c3b3fdfb7fa486
--- /dev/null
+++ b/joint_model/checkpoint-558/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:daf754a07f7208d1b5249210376c29c1d32533c690c3984eadc45be2abd83432
+size 16389
diff --git a/joint_model/checkpoint-558/rng_state_2.pth b/joint_model/checkpoint-558/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..878eaa18d13aea84b790df0a4fb92cee59696d78
--- /dev/null
+++ b/joint_model/checkpoint-558/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96f47fc93c047de8cd7a54bb1da5290b59ab7545af86022265dc627b49a60554
+size 16389
diff --git a/joint_model/checkpoint-558/rng_state_3.pth b/joint_model/checkpoint-558/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a5ef7f1850b8436e1ee6c604ec9a52b84ed26db2
--- /dev/null
+++ b/joint_model/checkpoint-558/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5c81c63dde2d7353453ff13fdc65d31a1ed80f49baa476e2717a451ef4fa726
+size 16389
diff --git a/joint_model/checkpoint-558/rng_state_4.pth b/joint_model/checkpoint-558/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..72d9112d5adf4ecceaaf09664e3195cbae361d4c
--- /dev/null
+++ b/joint_model/checkpoint-558/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26f756dc8c9c847dba6331631513be6924dcad79409766453e3e75280a0595da
+size 16389
diff --git a/joint_model/checkpoint-558/rng_state_5.pth b/joint_model/checkpoint-558/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..18945a9838866eca9d6ae2cc1bd6ca36b617f46c
--- /dev/null
+++ b/joint_model/checkpoint-558/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eea7a79383e8430e4e030fafe149a75c93dc40e69ef9d62e24c9e322b363a2f3
+size 16389
diff --git a/joint_model/checkpoint-558/rng_state_6.pth b/joint_model/checkpoint-558/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6249af6dae34db55690edbef55d64dd18ed44a25
--- /dev/null
+++ b/joint_model/checkpoint-558/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c85a6aa79aeb28d3314cb33b6346ef96c4aac933110943815f9121824fb93ef
+size 16389
diff --git a/joint_model/checkpoint-558/rng_state_7.pth b/joint_model/checkpoint-558/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..faf7f8cc91d1f5e285bbe8e38a4f053508028bab
--- /dev/null
+++ b/joint_model/checkpoint-558/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3c878f39a05836b9436f33f1cf9302acb4524421a6f086e59035ec929f79611
+size 16389
diff --git a/joint_model/checkpoint-558/scheduler.pt b/joint_model/checkpoint-558/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5d6621ee56f7420b2390c6b22868c81dacca16ae
--- /dev/null
+++ b/joint_model/checkpoint-558/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7af96adfd67e9829e5edf51801a1d6a079e91c6a6adf44d42b4c42ae2206d71d
+size 1465
diff --git a/joint_model/checkpoint-558/sentencepiece.bpe.model b/joint_model/checkpoint-558/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/joint_model/checkpoint-558/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/joint_model/checkpoint-558/special_tokens_map.json b/joint_model/checkpoint-558/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/joint_model/checkpoint-558/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-558/tokenizer.json b/joint_model/checkpoint-558/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed
--- /dev/null
+++ b/joint_model/checkpoint-558/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
+size 17082734
diff --git a/joint_model/checkpoint-558/tokenizer_config.json b/joint_model/checkpoint-558/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24
--- /dev/null
+++ b/joint_model/checkpoint-558/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 512,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-558/trainer_state.json b/joint_model/checkpoint-558/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a70d6e3e14b4b8a1943d01f6921ff9b6bd9fb8b3
--- /dev/null
+++ b/joint_model/checkpoint-558/trainer_state.json
@@ -0,0 +1,65 @@
+{
+ "best_global_step": 558,
+ "best_metric": 0.5648184418678284,
+ "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-558",
+ "epoch": 2.0,
+ "eval_steps": 500,
+ "global_step": 558,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy_level1": 0.943,
+ "eval_accuracy_level2": 0.8282,
+ "eval_f1_level1": 0.9424784900051851,
+ "eval_f1_level2": 0.7894811362618394,
+ "eval_loss": 1.1101479530334473,
+ "eval_runtime": 0.6677,
+ "eval_samples_per_second": 7488.47,
+ "eval_steps_per_second": 14.977,
+ "step": 279
+ },
+ {
+ "epoch": 1.7921146953405018,
+ "grad_norm": 8.197423934936523,
+ "learning_rate": 1.642293906810036e-05,
+ "loss": 2.029,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy_level1": 0.963,
+ "eval_accuracy_level2": 0.9134,
+ "eval_f1_level1": 0.962976281751424,
+ "eval_f1_level2": 0.9010101771001547,
+ "eval_loss": 0.5648184418678284,
+ "eval_runtime": 0.6225,
+ "eval_samples_per_second": 8032.157,
+ "eval_steps_per_second": 16.064,
+ "step": 558
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2790,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1.4690292598833152e+16,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/joint_model/checkpoint-558/training_args.bin b/joint_model/checkpoint-558/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f
--- /dev/null
+++ b/joint_model/checkpoint-558/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
+size 5777
diff --git a/joint_model/checkpoint-837/config.json b/joint_model/checkpoint-837/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e
--- /dev/null
+++ b/joint_model/checkpoint-837/config.json
@@ -0,0 +1,27 @@
+{
+ "architectures": [
+ "HierarchicalXLMRoberta"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "bos_token_id": 0,
+ "classifier_dropout": null,
+ "dtype": "float32",
+ "eos_token_id": 2,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_norm_eps": 1e-05,
+ "max_position_embeddings": 514,
+ "model_type": "xlm-roberta",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "position_embedding_type": "absolute",
+ "transformers_version": "4.56.1",
+ "type_vocab_size": 1,
+ "use_cache": true,
+ "vocab_size": 250002
+}
diff --git a/joint_model/checkpoint-837/model.safetensors b/joint_model/checkpoint-837/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0378493d049e5ca19242f9865e84cfb412551575
--- /dev/null
+++ b/joint_model/checkpoint-837/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbdc9576192f76fda5ff7073f488a88a3c707f4e196b71004fe8720f111c0c97
+size 1112408092
diff --git a/joint_model/checkpoint-837/optimizer.pt b/joint_model/checkpoint-837/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5819112e53f8f47014887290262d41893bd646b6
--- /dev/null
+++ b/joint_model/checkpoint-837/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a402a284354466004a224d1ff4886b60b44fd578857ae3ac217661f2961e435
+size 2224937355
diff --git a/joint_model/checkpoint-837/rng_state_0.pth b/joint_model/checkpoint-837/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..455f5c62f124af2602e8106dee685fd51e64e289
--- /dev/null
+++ b/joint_model/checkpoint-837/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8174d893cf29edfa4a6b3ac809dcc6a14286fc178cc7c02ad6cf43d0c9f0627a
+size 16389
diff --git a/joint_model/checkpoint-837/rng_state_1.pth b/joint_model/checkpoint-837/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..578380829a7e0f40e0d17e36ae4cb750867f4c76
--- /dev/null
+++ b/joint_model/checkpoint-837/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d92dda4b058d54309f7b8865b9b8bb771992c275e064d8d0a2e2878cb6c48f6d
+size 16389
diff --git a/joint_model/checkpoint-837/rng_state_2.pth b/joint_model/checkpoint-837/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..101160d1586d76a59538897543f72c3a33c08339
--- /dev/null
+++ b/joint_model/checkpoint-837/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f36cc81c51fd8f3bd067fa3778ae226b48e70a0415d700f6b6a3ac320f64adef
+size 16389
diff --git a/joint_model/checkpoint-837/rng_state_3.pth b/joint_model/checkpoint-837/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..53194b3efbb3392a51eff68f23e37b910265d1c2
--- /dev/null
+++ b/joint_model/checkpoint-837/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8c9f4c96b25a679255a8a7863b55a726bcc322bda28bbb47d831c9d4e8d256f
+size 16389
diff --git a/joint_model/checkpoint-837/rng_state_4.pth b/joint_model/checkpoint-837/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..733a394f80b18de6d2fa66b537b0c3c3857ca822
--- /dev/null
+++ b/joint_model/checkpoint-837/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b351bd966dcca1cdb1afc30df6abd6eed5b37d1aef9513f4d4fb3ed55ba30d77
+size 16389
diff --git a/joint_model/checkpoint-837/rng_state_5.pth b/joint_model/checkpoint-837/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..26d4d91fc64649e60cdc752712ced3108fd2e884
--- /dev/null
+++ b/joint_model/checkpoint-837/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fde6d99daa5ee5f16382eff0274f36d4a3e66d2a19c4d93ac2583ec5d97145e
+size 16389
diff --git a/joint_model/checkpoint-837/rng_state_6.pth b/joint_model/checkpoint-837/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7dfb63f85419cd84c0be2cfc4a7c7dcd87dbe9d0
--- /dev/null
+++ b/joint_model/checkpoint-837/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:894daeb25623ff3f610f6dab0e5ad44aa5d790b0644a685341e951902321c585
+size 16389
diff --git a/joint_model/checkpoint-837/rng_state_7.pth b/joint_model/checkpoint-837/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f2b2d6e881524d6ac5bd96e4c9bc13109a8f820f
--- /dev/null
+++ b/joint_model/checkpoint-837/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9775aa1d74b9dd14e2c4b03adab017c7bce57f30059be797f865e39d506c1fe4
+size 16389
diff --git a/joint_model/checkpoint-837/scheduler.pt b/joint_model/checkpoint-837/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3fd22075828107167117ca734c13aae6730d3923
--- /dev/null
+++ b/joint_model/checkpoint-837/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad9651133913dbf6b81c34375b7481b50a2618f44ce0ab4329fe73988a369a5d
+size 1465
diff --git a/joint_model/checkpoint-837/sentencepiece.bpe.model b/joint_model/checkpoint-837/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/joint_model/checkpoint-837/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/joint_model/checkpoint-837/special_tokens_map.json b/joint_model/checkpoint-837/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/joint_model/checkpoint-837/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-837/tokenizer.json b/joint_model/checkpoint-837/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed
--- /dev/null
+++ b/joint_model/checkpoint-837/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
+size 17082734
diff --git a/joint_model/checkpoint-837/tokenizer_config.json b/joint_model/checkpoint-837/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24
--- /dev/null
+++ b/joint_model/checkpoint-837/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 512,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/joint_model/checkpoint-837/trainer_state.json b/joint_model/checkpoint-837/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3a21c1209e58f4e6482c896f428aeb016349350d
--- /dev/null
+++ b/joint_model/checkpoint-837/trainer_state.json
@@ -0,0 +1,77 @@
+{
+ "best_global_step": 837,
+ "best_metric": 0.3886409401893616,
+ "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-837",
+ "epoch": 3.0,
+ "eval_steps": 500,
+ "global_step": 837,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy_level1": 0.943,
+ "eval_accuracy_level2": 0.8282,
+ "eval_f1_level1": 0.9424784900051851,
+ "eval_f1_level2": 0.7894811362618394,
+ "eval_loss": 1.1101479530334473,
+ "eval_runtime": 0.6677,
+ "eval_samples_per_second": 7488.47,
+ "eval_steps_per_second": 14.977,
+ "step": 279
+ },
+ {
+ "epoch": 1.7921146953405018,
+ "grad_norm": 8.197423934936523,
+ "learning_rate": 1.642293906810036e-05,
+ "loss": 2.029,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy_level1": 0.963,
+ "eval_accuracy_level2": 0.9134,
+ "eval_f1_level1": 0.962976281751424,
+ "eval_f1_level2": 0.9010101771001547,
+ "eval_loss": 0.5648184418678284,
+ "eval_runtime": 0.6225,
+ "eval_samples_per_second": 8032.157,
+ "eval_steps_per_second": 16.064,
+ "step": 558
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy_level1": 0.9684,
+ "eval_accuracy_level2": 0.9404,
+ "eval_f1_level1": 0.9683869415305786,
+ "eval_f1_level2": 0.9353991249189201,
+ "eval_loss": 0.3886409401893616,
+ "eval_runtime": 0.7756,
+ "eval_samples_per_second": 6447.003,
+ "eval_steps_per_second": 12.894,
+ "step": 837
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2790,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 2.2035438831140864e+16,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/joint_model/checkpoint-837/training_args.bin b/joint_model/checkpoint-837/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f
--- /dev/null
+++ b/joint_model/checkpoint-837/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
+size 5777
diff --git a/joint_model/runs/Sep27_08-45-36_hanoi/events.out.tfevents.1758962744.hanoi.2933948.0 b/joint_model/runs/Sep27_08-45-36_hanoi/events.out.tfevents.1758962744.hanoi.2933948.0
new file mode 100644
index 0000000000000000000000000000000000000000..78c7da2cee63e4d18fb70d7316c234acee440466
--- /dev/null
+++ b/joint_model/runs/Sep27_08-45-36_hanoi/events.out.tfevents.1758962744.hanoi.2933948.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74dcc120d2317d5cdfbd2efe48a898320448ae22588348568c207ec24d601ab2
+size 4999
diff --git a/joint_model/runs/Sep27_08-48-29_hanoi/events.out.tfevents.1758962913.hanoi.2934734.0 b/joint_model/runs/Sep27_08-48-29_hanoi/events.out.tfevents.1758962913.hanoi.2934734.0
new file mode 100644
index 0000000000000000000000000000000000000000..2c6c07b53550ff7dc2d7109a8fba47c8044dcf64
--- /dev/null
+++ b/joint_model/runs/Sep27_08-48-29_hanoi/events.out.tfevents.1758962913.hanoi.2934734.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9edc8384af7bb48cff1e97e63340883da7f4f40e2e6f539e9eb5b5a1d30393a9
+size 40
diff --git a/joint_model/runs/Sep27_08-50-33_hanoi/events.out.tfevents.1758963043.hanoi.2936293.0 b/joint_model/runs/Sep27_08-50-33_hanoi/events.out.tfevents.1758963043.hanoi.2936293.0
new file mode 100644
index 0000000000000000000000000000000000000000..26916da1d4166a009305e796dc731fcd5187b301
--- /dev/null
+++ b/joint_model/runs/Sep27_08-50-33_hanoi/events.out.tfevents.1758963043.hanoi.2936293.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:622c63da7ca0df4b4192de7cbdf8c8dbb299daf82bf067ed95da90a6429dc32b
+size 11345
diff --git a/joint_model/runs/Sep27_16-43-47_hanoi/events.out.tfevents.1758991441.hanoi.2994230.0 b/joint_model/runs/Sep27_16-43-47_hanoi/events.out.tfevents.1758991441.hanoi.2994230.0
new file mode 100644
index 0000000000000000000000000000000000000000..510210de93267ba6991236bfa9600c8e5353a7d5
--- /dev/null
+++ b/joint_model/runs/Sep27_16-43-47_hanoi/events.out.tfevents.1758991441.hanoi.2994230.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11d45a0f0206b97817cef72b7b4a4e2ae36b869297b98c937a095819076206e5
+size 461
diff --git a/joint_model/runs/Sep27_16-44-42_hanoi/events.out.tfevents.1758991495.hanoi.2995062.0 b/joint_model/runs/Sep27_16-44-42_hanoi/events.out.tfevents.1758991495.hanoi.2995062.0
new file mode 100644
index 0000000000000000000000000000000000000000..32f4d58f82a764cf992d089bb11ecd0d210ff7df
--- /dev/null
+++ b/joint_model/runs/Sep27_16-44-42_hanoi/events.out.tfevents.1758991495.hanoi.2995062.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:988cc3d7efa4aba95f325a2a3f2608959ae5caac59028d608e210960886a2478
+size 461