diff --git a/.gitattributes b/.gitattributes index 80b5b77a5938402840a46182085e886f245a8d13..5c664143ac0587be17801213b4ed79d6cd48bc7e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -38,3 +38,13 @@ xlm/hierarchical_xlm_roberta/checkpoint-2688/tokenizer.json filter=lfs diff=lfs xlm/hierarchical_xlm_roberta/checkpoint-3584/tokenizer.json filter=lfs diff=lfs merge=lfs -text xlm/hierarchical_xlm_roberta/checkpoint-4480/tokenizer.json filter=lfs diff=lfs merge=lfs -text xlm/hierarchical_xlm_roberta/checkpoint-896/tokenizer.json filter=lfs diff=lfs merge=lfs -text +joint_model/checkpoint-1116/tokenizer.json filter=lfs diff=lfs merge=lfs -text +joint_model/checkpoint-1395/tokenizer.json filter=lfs diff=lfs merge=lfs -text +joint_model/checkpoint-1674/tokenizer.json filter=lfs diff=lfs merge=lfs -text +joint_model/checkpoint-1953/tokenizer.json filter=lfs diff=lfs merge=lfs -text +joint_model/checkpoint-2232/tokenizer.json filter=lfs diff=lfs merge=lfs -text +joint_model/checkpoint-2511/tokenizer.json filter=lfs diff=lfs merge=lfs -text +joint_model/checkpoint-279/tokenizer.json filter=lfs diff=lfs merge=lfs -text +joint_model/checkpoint-2790/tokenizer.json filter=lfs diff=lfs merge=lfs -text +joint_model/checkpoint-558/tokenizer.json filter=lfs diff=lfs merge=lfs -text +joint_model/checkpoint-837/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/joint_model/checkpoint-1116/config.json b/joint_model/checkpoint-1116/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e --- /dev/null +++ b/joint_model/checkpoint-1116/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "HierarchicalXLMRoberta" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "dtype": "float32", + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "transformers_version": "4.56.1", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/joint_model/checkpoint-1116/model.safetensors b/joint_model/checkpoint-1116/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b36df5a8cedb22370fa9f5bcfe329405d7064c47 --- /dev/null +++ b/joint_model/checkpoint-1116/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6792b2f6f167bf98323526de92da30fc30176242f8e06b2af08a1f76528af6d2 +size 1112408092 diff --git a/joint_model/checkpoint-1116/optimizer.pt b/joint_model/checkpoint-1116/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffd7530abe63d88351e38ccd505a222528fdfc30 --- /dev/null +++ b/joint_model/checkpoint-1116/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09fab47c4a460fe4a9c21057cf29f5557d19a2d723b628bc0898a267dadaa022 +size 2224937355 diff --git a/joint_model/checkpoint-1116/rng_state_0.pth b/joint_model/checkpoint-1116/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..370b1ba2fe6af728e764dc2c23d043a82ffcdd65 --- /dev/null +++ b/joint_model/checkpoint-1116/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c2ca6cda30f0e67f92af2785341362f9fd75975cfa0e1e3edf170b31356982d +size 16389 diff --git a/joint_model/checkpoint-1116/rng_state_1.pth b/joint_model/checkpoint-1116/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..970ec8c78c6dd49742acc81896fd66a80e1a1fc4 --- /dev/null +++ b/joint_model/checkpoint-1116/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e19c421d330efe795fe888269f2979bdecc155b8ec1bae695ab646f830eda58 +size 16389 diff --git a/joint_model/checkpoint-1116/rng_state_2.pth b/joint_model/checkpoint-1116/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..44fc11d15282c307b7963a6b6f491561455773dd --- /dev/null +++ b/joint_model/checkpoint-1116/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4bfe1bdb6ffcc11de3dfa36f4f902a025948dd1a9d55f9e2a0c37a0c71d8993 +size 16389 diff --git a/joint_model/checkpoint-1116/rng_state_3.pth b/joint_model/checkpoint-1116/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..c6adc4374ed209813627ead664506f858bedddcb --- /dev/null +++ b/joint_model/checkpoint-1116/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7db4f6f2db3a4f2891f0c6123c6790aa8182a24731540060e1c5162bdfcf332 +size 16389 diff --git a/joint_model/checkpoint-1116/rng_state_4.pth b/joint_model/checkpoint-1116/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..6846ade94d473376d4b16cc63ab21c879a5d4359 --- /dev/null +++ b/joint_model/checkpoint-1116/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e325614869957231c07b72ed17ee0c0dec786269ff93abf17b16883c45761f74 +size 16389 diff --git a/joint_model/checkpoint-1116/rng_state_5.pth b/joint_model/checkpoint-1116/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..65d95e4b0202accfe850a9d9dd094963e3a7e18a --- /dev/null +++ b/joint_model/checkpoint-1116/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f06f2380c4f07e128133b104b687744df3b4064ecc4b332886d237fcca20b844 +size 16389 diff --git a/joint_model/checkpoint-1116/rng_state_6.pth b/joint_model/checkpoint-1116/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..39fa906a21713ad6721331651b3ce2d9bbcea060 --- /dev/null +++ b/joint_model/checkpoint-1116/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6806143edb28cfaf20fbcd82eda60303d3570b7414ff8996f022cf2b33990496 +size 16389 diff --git a/joint_model/checkpoint-1116/rng_state_7.pth b/joint_model/checkpoint-1116/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..e1713a674f32dd13384652ba674cd527315f4dcb --- /dev/null +++ b/joint_model/checkpoint-1116/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5cdf368ea3798bd2e333443bb2d4214053bef95ce8af4541fba9630a6adda4 +size 16389 diff --git a/joint_model/checkpoint-1116/scheduler.pt b/joint_model/checkpoint-1116/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab27d781542a9529f435bad1d10c56162329301f --- /dev/null +++ b/joint_model/checkpoint-1116/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d771b00dd64d2f869efc678cde233c82e3d5d80f32fc0afc37d60c16f5276a7 +size 1465 diff --git a/joint_model/checkpoint-1116/sentencepiece.bpe.model b/joint_model/checkpoint-1116/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/joint_model/checkpoint-1116/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/joint_model/checkpoint-1116/special_tokens_map.json b/joint_model/checkpoint-1116/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/joint_model/checkpoint-1116/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/joint_model/checkpoint-1116/tokenizer.json b/joint_model/checkpoint-1116/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed --- /dev/null +++ b/joint_model/checkpoint-1116/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20 +size 17082734 diff --git a/joint_model/checkpoint-1116/tokenizer_config.json b/joint_model/checkpoint-1116/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24 --- /dev/null +++ b/joint_model/checkpoint-1116/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/joint_model/checkpoint-1116/trainer_state.json b/joint_model/checkpoint-1116/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..76b544ae3abaf6b1a2481ed8bf5f6784de9904f9 --- /dev/null +++ b/joint_model/checkpoint-1116/trainer_state.json @@ -0,0 +1,96 @@ +{ + "best_global_step": 1116, + "best_metric": 0.3113965690135956, + "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-1116", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 1116, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy_level1": 0.943, + "eval_accuracy_level2": 0.8282, + "eval_f1_level1": 0.9424784900051851, + "eval_f1_level2": 0.7894811362618394, + "eval_loss": 1.1101479530334473, + "eval_runtime": 0.6677, + "eval_samples_per_second": 7488.47, + "eval_steps_per_second": 14.977, + "step": 279 + }, + { + "epoch": 1.7921146953405018, + "grad_norm": 8.197423934936523, + "learning_rate": 1.642293906810036e-05, + "loss": 2.029, + "step": 500 + }, + { + "epoch": 2.0, + "eval_accuracy_level1": 0.963, + "eval_accuracy_level2": 0.9134, + "eval_f1_level1": 0.962976281751424, + "eval_f1_level2": 0.9010101771001547, + "eval_loss": 0.5648184418678284, + "eval_runtime": 0.6225, + "eval_samples_per_second": 8032.157, + "eval_steps_per_second": 16.064, + "step": 558 + }, + { + "epoch": 3.0, + "eval_accuracy_level1": 0.9684, + "eval_accuracy_level2": 0.9404, + "eval_f1_level1": 0.9683869415305786, + "eval_f1_level2": 0.9353991249189201, + "eval_loss": 0.3886409401893616, + "eval_runtime": 0.7756, + "eval_samples_per_second": 6447.003, + "eval_steps_per_second": 12.894, + "step": 837 + }, + { + "epoch": 3.5842293906810037, + "grad_norm": 9.112466812133789, + "learning_rate": 1.2838709677419356e-05, + "loss": 0.5009, + "step": 1000 + }, + { + "epoch": 4.0, + "eval_accuracy_level1": 0.976, + "eval_accuracy_level2": 0.948, + "eval_f1_level1": 0.9759892345948809, + "eval_f1_level2": 0.944852130743217, + "eval_loss": 0.3113965690135956, + "eval_runtime": 0.7752, + "eval_samples_per_second": 6449.701, + "eval_steps_per_second": 12.899, + "step": 1116 + } + ], + "logging_steps": 500, + "max_steps": 2790, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.9380585063448576e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/joint_model/checkpoint-1116/training_args.bin b/joint_model/checkpoint-1116/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f --- /dev/null +++ b/joint_model/checkpoint-1116/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18 +size 5777 diff --git a/joint_model/checkpoint-1395/config.json b/joint_model/checkpoint-1395/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e --- /dev/null +++ b/joint_model/checkpoint-1395/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "HierarchicalXLMRoberta" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "dtype": "float32", + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "transformers_version": "4.56.1", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/joint_model/checkpoint-1395/model.safetensors b/joint_model/checkpoint-1395/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c2046e7c56b89501b3dbfd8dbfb49079f719a21 --- /dev/null +++ b/joint_model/checkpoint-1395/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c41e0216d9d3862040536a7fd9ba6b254a014777b0d7f1662f492db7a971f406 +size 1112408092 diff --git a/joint_model/checkpoint-1395/optimizer.pt b/joint_model/checkpoint-1395/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4963aef43b876bb8209e81ffbe6c1155904c2d54 --- /dev/null +++ b/joint_model/checkpoint-1395/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5607b6daf0f0a370ccef9270477c2e9a830246c8af0dd0c7fe8549c739d2e9ac +size 2224937355 diff --git a/joint_model/checkpoint-1395/rng_state_0.pth b/joint_model/checkpoint-1395/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..da320c884b910e7ee987fb46b56843fd158772ba --- /dev/null +++ b/joint_model/checkpoint-1395/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:372eabe9ee1b698bd155ed64252bb52ecd85f363df08d4a6c4512f0e000cb9b7 +size 16389 diff --git a/joint_model/checkpoint-1395/rng_state_1.pth b/joint_model/checkpoint-1395/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..35e2079094ce2616873e376941a9ecb592824a49 --- /dev/null +++ b/joint_model/checkpoint-1395/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d00e489391354b35849c16d9fa756e7012f5711d57ab1683f71c55ef187b9dd2 +size 16389 diff --git a/joint_model/checkpoint-1395/rng_state_2.pth b/joint_model/checkpoint-1395/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..84bdd9bbd5e30bd20d61107e4eecc9016a4b9b85 --- /dev/null +++ b/joint_model/checkpoint-1395/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0cf5772222d7ef83bf5bfa008bd16ddb5100e0b47d95a97be765c501e4e3cc +size 16389 diff --git a/joint_model/checkpoint-1395/rng_state_3.pth b/joint_model/checkpoint-1395/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..3775bf1aa145c7c637b56d33e846cbb00310434d --- /dev/null +++ b/joint_model/checkpoint-1395/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:622b3cd66dcaf83e587a5e0c31a6797932cb588d1367575466e0b8eac0b6b732 +size 16389 diff --git a/joint_model/checkpoint-1395/rng_state_4.pth b/joint_model/checkpoint-1395/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..34c25950706f8efe2fd0069421c3fe56519b1ea0 --- /dev/null +++ b/joint_model/checkpoint-1395/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dfb97eabb2e40d9e6d49009ad4b964b2c73fa42fbf32c8b1f59ae56bf87d92e +size 16389 diff --git a/joint_model/checkpoint-1395/rng_state_5.pth b/joint_model/checkpoint-1395/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..739bd65f6c2f4a85c04b39f8b84d7d1b3ade7c42 --- /dev/null +++ b/joint_model/checkpoint-1395/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c2e0af24431f9c87afbbee452b8c4cb68e55978cc475aca99862285217c6f8a +size 16389 diff --git a/joint_model/checkpoint-1395/rng_state_6.pth b/joint_model/checkpoint-1395/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..da8d5195bf50293f618fbd821a282240b2ea47b3 --- /dev/null +++ b/joint_model/checkpoint-1395/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c209482d19626584d72a324c23675cfbc298544cbd62b33073d59f67aa1d16e +size 16389 diff --git a/joint_model/checkpoint-1395/rng_state_7.pth b/joint_model/checkpoint-1395/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..3642f908d293393704d51779169855cb1607d60b --- /dev/null +++ b/joint_model/checkpoint-1395/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1547a4b17eaa25162d8c2232659379d5dc26ac63c151280e984e29b60718d591 +size 16389 diff --git a/joint_model/checkpoint-1395/scheduler.pt b/joint_model/checkpoint-1395/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..db3b1d5513e6f0855e1efda8871b357b2bbcda27 --- /dev/null +++ b/joint_model/checkpoint-1395/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9f1682436aaf9d161d72dc7e582154d9fbe0f817b46fd9cdc83dc2d8f4ef8f +size 1465 diff --git a/joint_model/checkpoint-1395/sentencepiece.bpe.model b/joint_model/checkpoint-1395/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/joint_model/checkpoint-1395/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/joint_model/checkpoint-1395/special_tokens_map.json b/joint_model/checkpoint-1395/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/joint_model/checkpoint-1395/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/joint_model/checkpoint-1395/tokenizer.json b/joint_model/checkpoint-1395/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed --- /dev/null +++ b/joint_model/checkpoint-1395/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20 +size 17082734 diff --git a/joint_model/checkpoint-1395/tokenizer_config.json b/joint_model/checkpoint-1395/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24 --- /dev/null +++ b/joint_model/checkpoint-1395/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/joint_model/checkpoint-1395/trainer_state.json b/joint_model/checkpoint-1395/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1f28e63598c58f3bc4405df20e259f7eb016acf2 --- /dev/null +++ b/joint_model/checkpoint-1395/trainer_state.json @@ -0,0 +1,108 @@ +{ + "best_global_step": 1395, + "best_metric": 0.2682338356971741, + "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-1395", + "epoch": 5.0, + "eval_steps": 500, + "global_step": 1395, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy_level1": 0.943, + "eval_accuracy_level2": 0.8282, + "eval_f1_level1": 0.9424784900051851, + "eval_f1_level2": 0.7894811362618394, + "eval_loss": 1.1101479530334473, + "eval_runtime": 0.6677, + "eval_samples_per_second": 7488.47, + "eval_steps_per_second": 14.977, + "step": 279 + }, + { + "epoch": 1.7921146953405018, + "grad_norm": 8.197423934936523, + "learning_rate": 1.642293906810036e-05, + "loss": 2.029, + "step": 500 + }, + { + "epoch": 2.0, + "eval_accuracy_level1": 0.963, + "eval_accuracy_level2": 0.9134, + "eval_f1_level1": 0.962976281751424, + "eval_f1_level2": 0.9010101771001547, + "eval_loss": 0.5648184418678284, + "eval_runtime": 0.6225, + "eval_samples_per_second": 8032.157, + "eval_steps_per_second": 16.064, + "step": 558 + }, + { + "epoch": 3.0, + "eval_accuracy_level1": 0.9684, + "eval_accuracy_level2": 0.9404, + "eval_f1_level1": 0.9683869415305786, + "eval_f1_level2": 0.9353991249189201, + "eval_loss": 0.3886409401893616, + "eval_runtime": 0.7756, + "eval_samples_per_second": 6447.003, + "eval_steps_per_second": 12.894, + "step": 837 + }, + { + "epoch": 3.5842293906810037, + "grad_norm": 9.112466812133789, + "learning_rate": 1.2838709677419356e-05, + "loss": 0.5009, + "step": 1000 + }, + { + "epoch": 4.0, + "eval_accuracy_level1": 0.976, + "eval_accuracy_level2": 0.948, + "eval_f1_level1": 0.9759892345948809, + "eval_f1_level2": 0.944852130743217, + "eval_loss": 0.3113965690135956, + "eval_runtime": 0.7752, + "eval_samples_per_second": 6449.701, + "eval_steps_per_second": 12.899, + "step": 1116 + }, + { + "epoch": 5.0, + "eval_accuracy_level1": 0.9786, + "eval_accuracy_level2": 0.9574, + "eval_f1_level1": 0.9786086703124734, + "eval_f1_level2": 0.9558414971682437, + "eval_loss": 0.2682338356971741, + "eval_runtime": 0.7265, + "eval_samples_per_second": 6882.558, + "eval_steps_per_second": 13.765, + "step": 1395 + } + ], + "logging_steps": 500, + "max_steps": 2790, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.672573129575629e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/joint_model/checkpoint-1395/training_args.bin b/joint_model/checkpoint-1395/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f --- /dev/null +++ b/joint_model/checkpoint-1395/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18 +size 5777 diff --git a/joint_model/checkpoint-1674/config.json b/joint_model/checkpoint-1674/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e --- /dev/null +++ b/joint_model/checkpoint-1674/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "HierarchicalXLMRoberta" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "dtype": "float32", + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "transformers_version": "4.56.1", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/joint_model/checkpoint-1674/model.safetensors b/joint_model/checkpoint-1674/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a997964889512431292718f1ff67f0006b63ddb5 --- /dev/null +++ b/joint_model/checkpoint-1674/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07732aadf5ff31e4f11ced15e3b3f675dcec364a1c0b188e03180a6719bbc0bc +size 1112408092 diff --git a/joint_model/checkpoint-1674/optimizer.pt b/joint_model/checkpoint-1674/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c09f59af2e43004188e0734d98115af85306c60f --- /dev/null +++ b/joint_model/checkpoint-1674/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c772fa9b4c710c806b65a2db5c8842dd78cbf61f45a0b88ae50f5bedc42508d0 +size 2224937355 diff --git a/joint_model/checkpoint-1674/rng_state_0.pth b/joint_model/checkpoint-1674/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..5c5b089603c6bed48b4f1765cdc060e30df0d96e --- /dev/null +++ b/joint_model/checkpoint-1674/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:244fd61bffb73015ca283aaf0b690244b4bf656c6c488f789153206cc5ca419c +size 16389 diff --git a/joint_model/checkpoint-1674/rng_state_1.pth b/joint_model/checkpoint-1674/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..35b663e5109c96433353341cbcebbb9cfa733d71 --- /dev/null +++ b/joint_model/checkpoint-1674/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85bf43502cd59c3e7002fdd28d84f2755d1d50b9f8395ece5cce57a33e6a2de +size 16389 diff --git a/joint_model/checkpoint-1674/rng_state_2.pth b/joint_model/checkpoint-1674/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f3e31ee1ff633db8b6bb880049319c3207f67964 --- /dev/null +++ b/joint_model/checkpoint-1674/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f36f28619c5d75933e40cf2ed1e4be028945b0f4185cc9e11476f543d2e8d7c +size 16389 diff --git a/joint_model/checkpoint-1674/rng_state_3.pth b/joint_model/checkpoint-1674/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fe3ac6db09cda87bd31d2e4ec0716a985746e95 --- /dev/null +++ b/joint_model/checkpoint-1674/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b838f4cf0f902e11d576f4b832468a92c12d05620e73c537a7302c86e09c2752 +size 16389 diff --git a/joint_model/checkpoint-1674/rng_state_4.pth b/joint_model/checkpoint-1674/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..383ae6b5b82ac4453d4ca1b20354b92421e9c690 --- /dev/null +++ b/joint_model/checkpoint-1674/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c946fcb7164ab82957cc2fd2229fec1ea962b6eb3608757ad9302a5956a5782 +size 16389 diff --git a/joint_model/checkpoint-1674/rng_state_5.pth b/joint_model/checkpoint-1674/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..ff350b2c79149fd18f8cd8b2c243385d2382b2d5 --- /dev/null +++ b/joint_model/checkpoint-1674/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1498a3eb9fd6a568db26513c62c91ef104aa0da2637df120f18cbd8604a6fefc +size 16389 diff --git a/joint_model/checkpoint-1674/rng_state_6.pth b/joint_model/checkpoint-1674/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b9dd031fec5e31d58a482247d8a2a3305f962dd --- /dev/null +++ b/joint_model/checkpoint-1674/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f449c8299ada96be66c0e08d0603887c01bcef56d3f3c2bf63a0fc8a43664aa +size 16389 diff --git a/joint_model/checkpoint-1674/rng_state_7.pth b/joint_model/checkpoint-1674/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..6dbef8d8c724d316bf25504254d02169bb817a41 --- /dev/null +++ b/joint_model/checkpoint-1674/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a2a661a72b9f31cca93340f54a4c320474844987c784955cd6cf6c6f2d2cd65 +size 16389 diff --git a/joint_model/checkpoint-1674/scheduler.pt b/joint_model/checkpoint-1674/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..064005b649997d8373a278bbbb208eb738923aef --- /dev/null +++ b/joint_model/checkpoint-1674/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e9460e5ee67dc8bfa729d93fdbc410d0dd92c60badc5c6eac4d11137e2d9011 +size 1465 diff --git a/joint_model/checkpoint-1674/sentencepiece.bpe.model b/joint_model/checkpoint-1674/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/joint_model/checkpoint-1674/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/joint_model/checkpoint-1674/special_tokens_map.json b/joint_model/checkpoint-1674/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/joint_model/checkpoint-1674/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/joint_model/checkpoint-1674/tokenizer.json b/joint_model/checkpoint-1674/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed --- /dev/null +++ b/joint_model/checkpoint-1674/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20 +size 17082734 diff --git a/joint_model/checkpoint-1674/tokenizer_config.json b/joint_model/checkpoint-1674/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24 --- /dev/null +++ b/joint_model/checkpoint-1674/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/joint_model/checkpoint-1674/trainer_state.json b/joint_model/checkpoint-1674/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..576b9fc95ea9680896ce0940eb0185a7c41e6081 --- /dev/null +++ b/joint_model/checkpoint-1674/trainer_state.json @@ -0,0 +1,127 @@ +{ + "best_global_step": 1674, + "best_metric": 0.2282806634902954, + "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-1674", + "epoch": 6.0, + "eval_steps": 500, + "global_step": 1674, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy_level1": 0.943, + "eval_accuracy_level2": 0.8282, + "eval_f1_level1": 0.9424784900051851, + "eval_f1_level2": 0.7894811362618394, + "eval_loss": 1.1101479530334473, + "eval_runtime": 0.6677, + "eval_samples_per_second": 7488.47, + "eval_steps_per_second": 14.977, + "step": 279 + }, + { + "epoch": 1.7921146953405018, + "grad_norm": 8.197423934936523, + "learning_rate": 1.642293906810036e-05, + "loss": 2.029, + "step": 500 + }, + { + "epoch": 2.0, + "eval_accuracy_level1": 0.963, + "eval_accuracy_level2": 0.9134, + "eval_f1_level1": 0.962976281751424, + "eval_f1_level2": 0.9010101771001547, + "eval_loss": 0.5648184418678284, + "eval_runtime": 0.6225, + "eval_samples_per_second": 8032.157, + "eval_steps_per_second": 16.064, + "step": 558 + }, + { + "epoch": 3.0, + "eval_accuracy_level1": 0.9684, + "eval_accuracy_level2": 0.9404, + "eval_f1_level1": 0.9683869415305786, + "eval_f1_level2": 0.9353991249189201, + "eval_loss": 0.3886409401893616, + "eval_runtime": 0.7756, + "eval_samples_per_second": 6447.003, + "eval_steps_per_second": 12.894, + "step": 837 + }, + { + "epoch": 3.5842293906810037, + "grad_norm": 9.112466812133789, + "learning_rate": 1.2838709677419356e-05, + "loss": 0.5009, + "step": 1000 + }, + { + "epoch": 4.0, + "eval_accuracy_level1": 0.976, + "eval_accuracy_level2": 0.948, + "eval_f1_level1": 0.9759892345948809, + "eval_f1_level2": 0.944852130743217, + "eval_loss": 0.3113965690135956, + "eval_runtime": 0.7752, + "eval_samples_per_second": 6449.701, + "eval_steps_per_second": 12.899, + "step": 1116 + }, + { + "epoch": 5.0, + "eval_accuracy_level1": 0.9786, + "eval_accuracy_level2": 0.9574, + "eval_f1_level1": 0.9786086703124734, + "eval_f1_level2": 0.9558414971682437, + "eval_loss": 0.2682338356971741, + "eval_runtime": 0.7265, + "eval_samples_per_second": 6882.558, + "eval_steps_per_second": 13.765, + "step": 1395 + }, + { + "epoch": 5.376344086021505, + "grad_norm": 5.6989006996154785, + "learning_rate": 9.254480286738352e-06, + "loss": 0.2818, + "step": 1500 + }, + { + "epoch": 6.0, + "eval_accuracy_level1": 0.9804, + "eval_accuracy_level2": 0.9642, + "eval_f1_level1": 0.9803982577567285, + "eval_f1_level2": 0.9638909785156317, + "eval_loss": 0.2282806634902954, + "eval_runtime": 0.5242, + "eval_samples_per_second": 9538.102, + "eval_steps_per_second": 19.076, + "step": 1674 + } + ], + "logging_steps": 500, + "max_steps": 2790, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.407087779649946e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/joint_model/checkpoint-1674/training_args.bin b/joint_model/checkpoint-1674/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f --- /dev/null +++ b/joint_model/checkpoint-1674/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18 +size 5777 diff --git a/joint_model/checkpoint-1953/config.json b/joint_model/checkpoint-1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e --- /dev/null +++ b/joint_model/checkpoint-1953/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "HierarchicalXLMRoberta" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "dtype": "float32", + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "transformers_version": "4.56.1", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/joint_model/checkpoint-1953/model.safetensors b/joint_model/checkpoint-1953/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5485e04bd7b63127d59a7bd6f54179feb8def90 --- /dev/null +++ b/joint_model/checkpoint-1953/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad4a7cf70e775f9af2c6e7bfe2b1d85e2ac707cca504fbcf90f2b8eab888da6c +size 1112408092 diff --git a/joint_model/checkpoint-1953/optimizer.pt b/joint_model/checkpoint-1953/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..22e3d7c80a42f9b7918c97eba412c33d7ef4ecb6 --- /dev/null +++ b/joint_model/checkpoint-1953/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f5db5ddff0f46638f8ffcd7a3348a901dc2e59319f2dad9e72c0b43e1cfb14 +size 2224937355 diff --git a/joint_model/checkpoint-1953/rng_state_0.pth b/joint_model/checkpoint-1953/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..097ae6c2646458060a404e3bea1b06745616853e --- /dev/null +++ b/joint_model/checkpoint-1953/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d7419b306e44cd29b99927850e0762e6cedc8e78103e4ee470068182e831ec9 +size 16389 diff --git a/joint_model/checkpoint-1953/rng_state_1.pth b/joint_model/checkpoint-1953/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..a10c34b9a07acca7ab91be6a6664d7be31c99f5b --- /dev/null +++ b/joint_model/checkpoint-1953/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee7c126631ead2a377084a6afb17c9a6517adce641f074de6a2da6615bc1cec4 +size 16389 diff --git a/joint_model/checkpoint-1953/rng_state_2.pth b/joint_model/checkpoint-1953/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..7be8101cdb047773d47ff0dc0ca1a7347b3488bd --- /dev/null +++ b/joint_model/checkpoint-1953/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50796af5be06a84e49c3af4c44824688014982d1b23fc31209ffbdadef38568c +size 16389 diff --git a/joint_model/checkpoint-1953/rng_state_3.pth b/joint_model/checkpoint-1953/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..822e20adcb64ffe68de49612c395f5ce6935b200 --- /dev/null +++ b/joint_model/checkpoint-1953/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cda81dbdc54a37b3833a921f18435947d7bd013b96c41cea0c164113211deb +size 16389 diff --git a/joint_model/checkpoint-1953/rng_state_4.pth b/joint_model/checkpoint-1953/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..85da0c7a134eed822359cba8c14ef402ce730701 --- /dev/null +++ b/joint_model/checkpoint-1953/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5149428d8ee6900fe8130620cadd9eb6d9d978b5b4186595866121d9525f3379 +size 16389 diff --git a/joint_model/checkpoint-1953/rng_state_5.pth b/joint_model/checkpoint-1953/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..15ec9a3284b8ac4784590de13799f4218eb4abf3 --- /dev/null +++ b/joint_model/checkpoint-1953/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:facb048b78a826056bc8bc47acf1a11f9ebcd8c625072a65a9bee9aee2cbbcbf +size 16389 diff --git a/joint_model/checkpoint-1953/rng_state_6.pth b/joint_model/checkpoint-1953/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..f06cbfd938841ffc0ab3cb2e39f967b2b2732f86 --- /dev/null +++ b/joint_model/checkpoint-1953/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e434b51619e210224fcd0175caee94b66a555a4e3dc46a4c661efe095b7c6aea +size 16389 diff --git a/joint_model/checkpoint-1953/rng_state_7.pth b/joint_model/checkpoint-1953/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..bb30ddffa9cd236687f556fa691307d1a94f291d --- /dev/null +++ b/joint_model/checkpoint-1953/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc01d772e9ab257c6d19c7045e95189649fd957d32d97c5e18ce514bfb85c56b +size 16389 diff --git a/joint_model/checkpoint-1953/scheduler.pt b/joint_model/checkpoint-1953/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f53b16cfb4ea8185895ba465248031f8a970d08 --- /dev/null +++ b/joint_model/checkpoint-1953/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1510e7a38b890586532c1cb5fa88825d9a7ec78a98c988a18d6545b1b548c37 +size 1465 diff --git a/joint_model/checkpoint-1953/sentencepiece.bpe.model b/joint_model/checkpoint-1953/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/joint_model/checkpoint-1953/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/joint_model/checkpoint-1953/special_tokens_map.json b/joint_model/checkpoint-1953/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/joint_model/checkpoint-1953/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/joint_model/checkpoint-1953/tokenizer.json b/joint_model/checkpoint-1953/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed --- /dev/null +++ b/joint_model/checkpoint-1953/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20 +size 17082734 diff --git a/joint_model/checkpoint-1953/tokenizer_config.json b/joint_model/checkpoint-1953/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24 --- /dev/null +++ b/joint_model/checkpoint-1953/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/joint_model/checkpoint-1953/trainer_state.json b/joint_model/checkpoint-1953/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..74468f58bb509f93a6f7247345e9bf1f95cc81fa --- /dev/null +++ b/joint_model/checkpoint-1953/trainer_state.json @@ -0,0 +1,139 @@ +{ + "best_global_step": 1953, + "best_metric": 0.2116653025150299, + "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-1953", + "epoch": 7.0, + "eval_steps": 500, + "global_step": 1953, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy_level1": 0.943, + "eval_accuracy_level2": 0.8282, + "eval_f1_level1": 0.9424784900051851, + "eval_f1_level2": 0.7894811362618394, + "eval_loss": 1.1101479530334473, + "eval_runtime": 0.6677, + "eval_samples_per_second": 7488.47, + "eval_steps_per_second": 14.977, + "step": 279 + }, + { + "epoch": 1.7921146953405018, + "grad_norm": 8.197423934936523, + "learning_rate": 1.642293906810036e-05, + "loss": 2.029, + "step": 500 + }, + { + "epoch": 2.0, + "eval_accuracy_level1": 0.963, + "eval_accuracy_level2": 0.9134, + "eval_f1_level1": 0.962976281751424, + "eval_f1_level2": 0.9010101771001547, + "eval_loss": 0.5648184418678284, + "eval_runtime": 0.6225, + "eval_samples_per_second": 8032.157, + "eval_steps_per_second": 16.064, + "step": 558 + }, + { + "epoch": 3.0, + "eval_accuracy_level1": 0.9684, + "eval_accuracy_level2": 0.9404, + "eval_f1_level1": 0.9683869415305786, + "eval_f1_level2": 0.9353991249189201, + "eval_loss": 0.3886409401893616, + "eval_runtime": 0.7756, + "eval_samples_per_second": 6447.003, + "eval_steps_per_second": 12.894, + "step": 837 + }, + { + "epoch": 3.5842293906810037, + "grad_norm": 9.112466812133789, + "learning_rate": 1.2838709677419356e-05, + "loss": 0.5009, + "step": 1000 + }, + { + "epoch": 4.0, + "eval_accuracy_level1": 0.976, + "eval_accuracy_level2": 0.948, + "eval_f1_level1": 0.9759892345948809, + "eval_f1_level2": 0.944852130743217, + "eval_loss": 0.3113965690135956, + "eval_runtime": 0.7752, + "eval_samples_per_second": 6449.701, + "eval_steps_per_second": 12.899, + "step": 1116 + }, + { + "epoch": 5.0, + "eval_accuracy_level1": 0.9786, + "eval_accuracy_level2": 0.9574, + "eval_f1_level1": 0.9786086703124734, + "eval_f1_level2": 0.9558414971682437, + "eval_loss": 0.2682338356971741, + "eval_runtime": 0.7265, + "eval_samples_per_second": 6882.558, + "eval_steps_per_second": 13.765, + "step": 1395 + }, + { + "epoch": 5.376344086021505, + "grad_norm": 5.6989006996154785, + "learning_rate": 9.254480286738352e-06, + "loss": 0.2818, + "step": 1500 + }, + { + "epoch": 6.0, + "eval_accuracy_level1": 0.9804, + "eval_accuracy_level2": 0.9642, + "eval_f1_level1": 0.9803982577567285, + "eval_f1_level2": 0.9638909785156317, + "eval_loss": 0.2282806634902954, + "eval_runtime": 0.5242, + "eval_samples_per_second": 9538.102, + "eval_steps_per_second": 19.076, + "step": 1674 + }, + { + "epoch": 7.0, + "eval_accuracy_level1": 0.9824, + "eval_accuracy_level2": 0.9664, + "eval_f1_level1": 0.9823983306464108, + "eval_f1_level2": 0.966154693269809, + "eval_loss": 0.2116653025150299, + "eval_runtime": 0.4889, + "eval_samples_per_second": 10226.528, + "eval_steps_per_second": 20.453, + "step": 1953 + } + ], + "logging_steps": 500, + "max_steps": 2790, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.141602402880717e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/joint_model/checkpoint-1953/training_args.bin b/joint_model/checkpoint-1953/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f --- /dev/null +++ b/joint_model/checkpoint-1953/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18 +size 5777 diff --git a/joint_model/checkpoint-2232/config.json b/joint_model/checkpoint-2232/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e --- /dev/null +++ b/joint_model/checkpoint-2232/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "HierarchicalXLMRoberta" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "dtype": "float32", + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "transformers_version": "4.56.1", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/joint_model/checkpoint-2232/model.safetensors b/joint_model/checkpoint-2232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b29bd8b2544202a153a4b162d7148bbb5b40d60 --- /dev/null +++ b/joint_model/checkpoint-2232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f2e64493da2c4a5265ba172fbc217c41a49ae42cdf8910d07ea3db2c4f3221b +size 1112408092 diff --git a/joint_model/checkpoint-2232/optimizer.pt b/joint_model/checkpoint-2232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..24f0b08945bbc24ca418a1b30e57554970e44568 --- /dev/null +++ b/joint_model/checkpoint-2232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63178dd3e9619750df7cb2c82453b9a1cd61e41545953b97c7e1c3f86a4c4cc +size 2224937355 diff --git a/joint_model/checkpoint-2232/rng_state_0.pth b/joint_model/checkpoint-2232/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..aaf1830e296a77d397e07fd1bc06f5bbb4cc1f29 --- /dev/null +++ b/joint_model/checkpoint-2232/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eab41ef9c0e4a88cfd3ffc22fae20763c1be99c9635ddf59900314b6cf68124f +size 16389 diff --git a/joint_model/checkpoint-2232/rng_state_1.pth b/joint_model/checkpoint-2232/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..a55141c3022fe6ce66dbb82049f343399de15284 --- /dev/null +++ b/joint_model/checkpoint-2232/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21d03ccfd914586b01f981cc8eca5be1052848977ebd9804e886146d0d91a735 +size 16389 diff --git a/joint_model/checkpoint-2232/rng_state_2.pth b/joint_model/checkpoint-2232/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f240159ad3aeb7b9854160d91d86e7fae7976a49 --- /dev/null +++ b/joint_model/checkpoint-2232/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04d4994f524975ea9f9c147a27b7717c4cba700985878d76a0aa96bc52790e62 +size 16389 diff --git a/joint_model/checkpoint-2232/rng_state_3.pth b/joint_model/checkpoint-2232/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..fc0c4c8843453647fe03f639d755ba7d0cfdc973 --- /dev/null +++ b/joint_model/checkpoint-2232/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9420abfb9dcc1ae0d2ad1549454098fe9f673c85855e8857b7d340dbe4c1895c +size 16389 diff --git a/joint_model/checkpoint-2232/rng_state_4.pth b/joint_model/checkpoint-2232/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd23027d663c36a63327eb76e3d9a1ec33bef33d --- /dev/null +++ b/joint_model/checkpoint-2232/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77c2911a1fa1ff01d875bd3b1554286911c193b6e3302c81dcb970e5796879e +size 16389 diff --git a/joint_model/checkpoint-2232/rng_state_5.pth b/joint_model/checkpoint-2232/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..193400054dd0e7979768a6202b0019542a69ad63 --- /dev/null +++ b/joint_model/checkpoint-2232/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f93c4d2a90bba85d3c678944bc6e06bb3a24a4766908921a43290f4f9615c7da +size 16389 diff --git a/joint_model/checkpoint-2232/rng_state_6.pth b/joint_model/checkpoint-2232/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..edc588358a6e5c64701a7740c420bfdedd8f42d8 --- /dev/null +++ b/joint_model/checkpoint-2232/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82b4e921e87cd188b7e4483a1ebd9a4725ce1a9e4efef978c3c6878739fb28fd +size 16389 diff --git a/joint_model/checkpoint-2232/rng_state_7.pth b/joint_model/checkpoint-2232/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..a44733e2dda0da1b9233d79f4b9649e52b8c8ec9 --- /dev/null +++ b/joint_model/checkpoint-2232/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7628dc2de271047f4382d05b7cb261ca1bc8dbfa648ad5b59faaf19b3c5e3d15 +size 16389 diff --git a/joint_model/checkpoint-2232/scheduler.pt b/joint_model/checkpoint-2232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..72635c3631850f2f6fe792e5d85960886879ef49 --- /dev/null +++ b/joint_model/checkpoint-2232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fedf9863e48d7786659fe61566f5ceff4fd963097de5304c334acc258fc3d82d +size 1465 diff --git a/joint_model/checkpoint-2232/sentencepiece.bpe.model b/joint_model/checkpoint-2232/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/joint_model/checkpoint-2232/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/joint_model/checkpoint-2232/special_tokens_map.json b/joint_model/checkpoint-2232/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/joint_model/checkpoint-2232/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/joint_model/checkpoint-2232/tokenizer.json b/joint_model/checkpoint-2232/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed --- /dev/null +++ b/joint_model/checkpoint-2232/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20 +size 17082734 diff --git a/joint_model/checkpoint-2232/tokenizer_config.json b/joint_model/checkpoint-2232/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24 --- /dev/null +++ b/joint_model/checkpoint-2232/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/joint_model/checkpoint-2232/trainer_state.json b/joint_model/checkpoint-2232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d6361bdbc1b1b0bf58cc943470bd9f9af93e4bcb --- /dev/null +++ b/joint_model/checkpoint-2232/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_global_step": 2232, + "best_metric": 0.20548628270626068, + "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-2232", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 2232, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy_level1": 0.943, + "eval_accuracy_level2": 0.8282, + "eval_f1_level1": 0.9424784900051851, + "eval_f1_level2": 0.7894811362618394, + "eval_loss": 1.1101479530334473, + "eval_runtime": 0.6677, + "eval_samples_per_second": 7488.47, + "eval_steps_per_second": 14.977, + "step": 279 + }, + { + "epoch": 1.7921146953405018, + "grad_norm": 8.197423934936523, + "learning_rate": 1.642293906810036e-05, + "loss": 2.029, + "step": 500 + }, + { + "epoch": 2.0, + "eval_accuracy_level1": 0.963, + "eval_accuracy_level2": 0.9134, + "eval_f1_level1": 0.962976281751424, + "eval_f1_level2": 0.9010101771001547, + "eval_loss": 0.5648184418678284, + "eval_runtime": 0.6225, + "eval_samples_per_second": 8032.157, + "eval_steps_per_second": 16.064, + "step": 558 + }, + { + "epoch": 3.0, + "eval_accuracy_level1": 0.9684, + "eval_accuracy_level2": 0.9404, + "eval_f1_level1": 0.9683869415305786, + "eval_f1_level2": 0.9353991249189201, + "eval_loss": 0.3886409401893616, + "eval_runtime": 0.7756, + "eval_samples_per_second": 6447.003, + "eval_steps_per_second": 12.894, + "step": 837 + }, + { + "epoch": 3.5842293906810037, + "grad_norm": 9.112466812133789, + "learning_rate": 1.2838709677419356e-05, + "loss": 0.5009, + "step": 1000 + }, + { + "epoch": 4.0, + "eval_accuracy_level1": 0.976, + "eval_accuracy_level2": 0.948, + "eval_f1_level1": 0.9759892345948809, + "eval_f1_level2": 0.944852130743217, + "eval_loss": 0.3113965690135956, + "eval_runtime": 0.7752, + "eval_samples_per_second": 6449.701, + "eval_steps_per_second": 12.899, + "step": 1116 + }, + { + "epoch": 5.0, + "eval_accuracy_level1": 0.9786, + "eval_accuracy_level2": 0.9574, + "eval_f1_level1": 0.9786086703124734, + "eval_f1_level2": 0.9558414971682437, + "eval_loss": 0.2682338356971741, + "eval_runtime": 0.7265, + "eval_samples_per_second": 6882.558, + "eval_steps_per_second": 13.765, + "step": 1395 + }, + { + "epoch": 5.376344086021505, + "grad_norm": 5.6989006996154785, + "learning_rate": 9.254480286738352e-06, + "loss": 0.2818, + "step": 1500 + }, + { + "epoch": 6.0, + "eval_accuracy_level1": 0.9804, + "eval_accuracy_level2": 0.9642, + "eval_f1_level1": 0.9803982577567285, + "eval_f1_level2": 0.9638909785156317, + "eval_loss": 0.2282806634902954, + "eval_runtime": 0.5242, + "eval_samples_per_second": 9538.102, + "eval_steps_per_second": 19.076, + "step": 1674 + }, + { + "epoch": 7.0, + "eval_accuracy_level1": 0.9824, + "eval_accuracy_level2": 0.9664, + "eval_f1_level1": 0.9823983306464108, + "eval_f1_level2": 0.966154693269809, + "eval_loss": 0.2116653025150299, + "eval_runtime": 0.4889, + "eval_samples_per_second": 10226.528, + "eval_steps_per_second": 20.453, + "step": 1953 + }, + { + "epoch": 7.168458781362007, + "grad_norm": 7.060131072998047, + "learning_rate": 5.670250896057348e-06, + "loss": 0.1994, + "step": 2000 + }, + { + "epoch": 8.0, + "eval_accuracy_level1": 0.9826, + "eval_accuracy_level2": 0.969, + "eval_f1_level1": 0.9826127380767093, + "eval_f1_level2": 0.9688130224440087, + "eval_loss": 0.20548628270626068, + "eval_runtime": 0.5897, + "eval_samples_per_second": 8478.651, + "eval_steps_per_second": 16.957, + "step": 2232 + } + ], + "logging_steps": 500, + "max_steps": 2790, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.876117039533261e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/joint_model/checkpoint-2232/training_args.bin b/joint_model/checkpoint-2232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f --- /dev/null +++ b/joint_model/checkpoint-2232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18 +size 5777 diff --git a/joint_model/checkpoint-2511/config.json b/joint_model/checkpoint-2511/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e --- /dev/null +++ b/joint_model/checkpoint-2511/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "HierarchicalXLMRoberta" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "dtype": "float32", + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "transformers_version": "4.56.1", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/joint_model/checkpoint-2511/model.safetensors b/joint_model/checkpoint-2511/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..126da99d20433885933fe3030f0db43d4a08451b --- /dev/null +++ b/joint_model/checkpoint-2511/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b58a75dd0fb82a6466f8c24a7e148d1865aeb496457e2e915fbedf90ab56337c +size 1112408092 diff --git a/joint_model/checkpoint-2511/optimizer.pt b/joint_model/checkpoint-2511/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..15a59810d5bd62ab00128869dcc3a0bcc517aae7 --- /dev/null +++ b/joint_model/checkpoint-2511/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e17e16d6ec1e2dbc9f1335e801182aa45c918f19e1ee4d04f9ea9063cfb0228 +size 2224937355 diff --git a/joint_model/checkpoint-2511/rng_state_0.pth b/joint_model/checkpoint-2511/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..b4820f118dae3ac7345b9b4ab4959e9f90b24c24 --- /dev/null +++ b/joint_model/checkpoint-2511/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd5cb87ba9a49bfdede4fd7017cfe7aa4a16704948a38db3b45a4435b088d63 +size 16389 diff --git a/joint_model/checkpoint-2511/rng_state_1.pth b/joint_model/checkpoint-2511/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..17ae64b9a005901030ac3f456a231ed7bedf7cd8 --- /dev/null +++ b/joint_model/checkpoint-2511/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88bceb63645c6a72c5a3a14d7d8f53e4a57975cfa318ce618aa0364fa2fa178e +size 16389 diff --git a/joint_model/checkpoint-2511/rng_state_2.pth b/joint_model/checkpoint-2511/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..33629e7e4d47b1086848f31975a777515dd794fa --- /dev/null +++ b/joint_model/checkpoint-2511/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e26d53ef797d2b8afd11fa8018f8a2a18275514a7db4fa0809bd2e64b694ca40 +size 16389 diff --git a/joint_model/checkpoint-2511/rng_state_3.pth b/joint_model/checkpoint-2511/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..28e1f268c2c22ab86b98762ee7c2ec73ceaf25d5 --- /dev/null +++ b/joint_model/checkpoint-2511/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75d8277c6e380faee00d4c15a1df8d26aeb12735792bf3dc07300e1eebf34e11 +size 16389 diff --git a/joint_model/checkpoint-2511/rng_state_4.pth b/joint_model/checkpoint-2511/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..a42bfc01108f4a0222e35259ba90400a46d51e5c --- /dev/null +++ b/joint_model/checkpoint-2511/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a66ef23029bb31289cbf241042e70cec294b9e8377e3e81b6d32a4262ec5470 +size 16389 diff --git a/joint_model/checkpoint-2511/rng_state_5.pth b/joint_model/checkpoint-2511/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0eac2ec1cb33dd7ddec47ac06ae363e9cb594cd5 --- /dev/null +++ b/joint_model/checkpoint-2511/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcd7a02720f74c54a892bd969f94dd8a735232108c002ca73baccd6a22ff4736 +size 16389 diff --git a/joint_model/checkpoint-2511/rng_state_6.pth b/joint_model/checkpoint-2511/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..88f1cc8fe6207405dabcfde23c70eecdccfef402 --- /dev/null +++ b/joint_model/checkpoint-2511/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2584a385a3872614bcaf9f2ca9398b4492141d0b4a293fecac27f643fb7d258 +size 16389 diff --git a/joint_model/checkpoint-2511/rng_state_7.pth b/joint_model/checkpoint-2511/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..83736f35497a03caff8739f673b0d83e50664511 --- /dev/null +++ b/joint_model/checkpoint-2511/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da4c640c23994e085f14f83dccc0fca6c13410ca57c2c31b7acb350abf0bcae5 +size 16389 diff --git a/joint_model/checkpoint-2511/scheduler.pt b/joint_model/checkpoint-2511/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c14d72a2b24b39ade872c412fdb3bb03167acbd --- /dev/null +++ b/joint_model/checkpoint-2511/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:787980f142eded8d1cd676805f377fbdccd4c80688bd8d0f341162b7115eab43 +size 1465 diff --git a/joint_model/checkpoint-2511/sentencepiece.bpe.model b/joint_model/checkpoint-2511/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/joint_model/checkpoint-2511/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/joint_model/checkpoint-2511/special_tokens_map.json b/joint_model/checkpoint-2511/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/joint_model/checkpoint-2511/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/joint_model/checkpoint-2511/tokenizer.json b/joint_model/checkpoint-2511/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed --- /dev/null +++ b/joint_model/checkpoint-2511/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20 +size 17082734 diff --git a/joint_model/checkpoint-2511/tokenizer_config.json b/joint_model/checkpoint-2511/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24 --- /dev/null +++ b/joint_model/checkpoint-2511/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/joint_model/checkpoint-2511/trainer_state.json b/joint_model/checkpoint-2511/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e4393486d07a998f376f0f971d78831587db837c --- /dev/null +++ b/joint_model/checkpoint-2511/trainer_state.json @@ -0,0 +1,177 @@ +{ + "best_global_step": 2511, + "best_metric": 0.20010386407375336, + "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-2511", + "epoch": 9.0, + "eval_steps": 500, + "global_step": 2511, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy_level1": 0.943, + "eval_accuracy_level2": 0.8282, + "eval_f1_level1": 0.9424784900051851, + "eval_f1_level2": 0.7894811362618394, + "eval_loss": 1.1101479530334473, + "eval_runtime": 0.6677, + "eval_samples_per_second": 7488.47, + "eval_steps_per_second": 14.977, + "step": 279 + }, + { + "epoch": 1.7921146953405018, + "grad_norm": 8.197423934936523, + "learning_rate": 1.642293906810036e-05, + "loss": 2.029, + "step": 500 + }, + { + "epoch": 2.0, + "eval_accuracy_level1": 0.963, + "eval_accuracy_level2": 0.9134, + "eval_f1_level1": 0.962976281751424, + "eval_f1_level2": 0.9010101771001547, + "eval_loss": 0.5648184418678284, + "eval_runtime": 0.6225, + "eval_samples_per_second": 8032.157, + "eval_steps_per_second": 16.064, + "step": 558 + }, + { + "epoch": 3.0, + "eval_accuracy_level1": 0.9684, + "eval_accuracy_level2": 0.9404, + "eval_f1_level1": 0.9683869415305786, + "eval_f1_level2": 0.9353991249189201, + "eval_loss": 0.3886409401893616, + "eval_runtime": 0.7756, + "eval_samples_per_second": 6447.003, + "eval_steps_per_second": 12.894, + "step": 837 + }, + { + "epoch": 3.5842293906810037, + "grad_norm": 9.112466812133789, + "learning_rate": 1.2838709677419356e-05, + "loss": 0.5009, + "step": 1000 + }, + { + "epoch": 4.0, + "eval_accuracy_level1": 0.976, + "eval_accuracy_level2": 0.948, + "eval_f1_level1": 0.9759892345948809, + "eval_f1_level2": 0.944852130743217, + "eval_loss": 0.3113965690135956, + "eval_runtime": 0.7752, + "eval_samples_per_second": 6449.701, + "eval_steps_per_second": 12.899, + "step": 1116 + }, + { + "epoch": 5.0, + "eval_accuracy_level1": 0.9786, + "eval_accuracy_level2": 0.9574, + "eval_f1_level1": 0.9786086703124734, + "eval_f1_level2": 0.9558414971682437, + "eval_loss": 0.2682338356971741, + "eval_runtime": 0.7265, + "eval_samples_per_second": 6882.558, + "eval_steps_per_second": 13.765, + "step": 1395 + }, + { + "epoch": 5.376344086021505, + "grad_norm": 5.6989006996154785, + "learning_rate": 9.254480286738352e-06, + "loss": 0.2818, + "step": 1500 + }, + { + "epoch": 6.0, + "eval_accuracy_level1": 0.9804, + "eval_accuracy_level2": 0.9642, + "eval_f1_level1": 0.9803982577567285, + "eval_f1_level2": 0.9638909785156317, + "eval_loss": 0.2282806634902954, + "eval_runtime": 0.5242, + "eval_samples_per_second": 9538.102, + "eval_steps_per_second": 19.076, + "step": 1674 + }, + { + "epoch": 7.0, + "eval_accuracy_level1": 0.9824, + "eval_accuracy_level2": 0.9664, + "eval_f1_level1": 0.9823983306464108, + "eval_f1_level2": 0.966154693269809, + "eval_loss": 0.2116653025150299, + "eval_runtime": 0.4889, + "eval_samples_per_second": 10226.528, + "eval_steps_per_second": 20.453, + "step": 1953 + }, + { + "epoch": 7.168458781362007, + "grad_norm": 7.060131072998047, + "learning_rate": 5.670250896057348e-06, + "loss": 0.1994, + "step": 2000 + }, + { + "epoch": 8.0, + "eval_accuracy_level1": 0.9826, + "eval_accuracy_level2": 0.969, + "eval_f1_level1": 0.9826127380767093, + "eval_f1_level2": 0.9688130224440087, + "eval_loss": 0.20548628270626068, + "eval_runtime": 0.5897, + "eval_samples_per_second": 8478.651, + "eval_steps_per_second": 16.957, + "step": 2232 + }, + { + "epoch": 8.960573476702509, + "grad_norm": 9.435193061828613, + "learning_rate": 2.086021505376344e-06, + "loss": 0.1544, + "step": 2500 + }, + { + "epoch": 9.0, + "eval_accuracy_level1": 0.9836, + "eval_accuracy_level2": 0.9706, + "eval_f1_level1": 0.9836193977032266, + "eval_f1_level2": 0.9704863338256707, + "eval_loss": 0.20010386407375336, + "eval_runtime": 0.7263, + "eval_samples_per_second": 6884.526, + "eval_steps_per_second": 13.769, + "step": 2511 + } + ], + "logging_steps": 500, + "max_steps": 2790, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.610631666119475e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/joint_model/checkpoint-2511/training_args.bin b/joint_model/checkpoint-2511/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f --- /dev/null +++ b/joint_model/checkpoint-2511/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18 +size 5777 diff --git a/joint_model/checkpoint-279/config.json b/joint_model/checkpoint-279/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e --- /dev/null +++ b/joint_model/checkpoint-279/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "HierarchicalXLMRoberta" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "dtype": "float32", + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "transformers_version": "4.56.1", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/joint_model/checkpoint-279/model.safetensors b/joint_model/checkpoint-279/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7282703f4af5af18dbb4cfd9592c2e7c43f57848 --- /dev/null +++ b/joint_model/checkpoint-279/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37e2adab4b3aa29e0440ee6529e7e8e4006d09f3192695d2b51d3cc516e14e4b +size 1112408092 diff --git a/joint_model/checkpoint-279/optimizer.pt b/joint_model/checkpoint-279/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..031bb3dee1f2fe42c40c67ad0bff5534e13e5495 --- /dev/null +++ b/joint_model/checkpoint-279/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:445bbf18cdcb44b6eb39417a6938c95fd7daf5ef4c088862f8867f3d5c4190cf +size 2224937355 diff --git a/joint_model/checkpoint-279/rng_state_0.pth b/joint_model/checkpoint-279/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..bf445bc77857d4c2335a113c5cc9afe573264d4e --- /dev/null +++ b/joint_model/checkpoint-279/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f64d1373a611e74f2173b6b99973eaf69537d8506d436ec8e98d7b7edb7a2a9b +size 16389 diff --git a/joint_model/checkpoint-279/rng_state_1.pth b/joint_model/checkpoint-279/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef5aa6bd7e461a5c465001960a75fd47ff24f488 --- /dev/null +++ b/joint_model/checkpoint-279/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:307e59cec4aa1182c93ae2ab81253ec46d882c9111ee0deece55210ef1b9c09d +size 16389 diff --git a/joint_model/checkpoint-279/rng_state_2.pth b/joint_model/checkpoint-279/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..12bd35ec04b2466e17f936c746665e5bc30c6584 --- /dev/null +++ b/joint_model/checkpoint-279/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c9086f7ab23e0beab55e7b326ea890b64daea93653f3b7fe182982f2142e647 +size 16389 diff --git a/joint_model/checkpoint-279/rng_state_3.pth b/joint_model/checkpoint-279/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..42faf6dca4f850042ddef7d67ed10f705190525b --- /dev/null +++ b/joint_model/checkpoint-279/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3635d4bd8da07c184a0120bba6b1421f449002105c709301bfef0cae3d8d8773 +size 16389 diff --git a/joint_model/checkpoint-279/rng_state_4.pth b/joint_model/checkpoint-279/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..db77c56f37de7634511dc3b2f5552448dad4783b --- /dev/null +++ b/joint_model/checkpoint-279/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:080ca24393ce589e8fbc32ddb1baf7115949b3cda7f562dbed8e243014e45170 +size 16389 diff --git a/joint_model/checkpoint-279/rng_state_5.pth b/joint_model/checkpoint-279/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..62a174fe3f9979f6493bf5057243a070d55991f3 --- /dev/null +++ b/joint_model/checkpoint-279/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d05bb7ba0220bf8a5afe1ddf326b9c098a4ef4ba5b95ad6a98af48593e6afe2 +size 16389 diff --git a/joint_model/checkpoint-279/rng_state_6.pth b/joint_model/checkpoint-279/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..46b93989a2d5d616316700b6099ba1cb584536db --- /dev/null +++ b/joint_model/checkpoint-279/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59383333e0ddfb6ba4803fae0ef2a9832b5b186ae5bce3132326b0dbca291894 +size 16389 diff --git a/joint_model/checkpoint-279/rng_state_7.pth b/joint_model/checkpoint-279/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..4d2c04235046272a96fdb3a37f3613d68ae86b4a --- /dev/null +++ b/joint_model/checkpoint-279/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3254021ab22193bf6b783682ec194469d102dac3e5e70b5426541d6fef1818e9 +size 16389 diff --git a/joint_model/checkpoint-279/scheduler.pt b/joint_model/checkpoint-279/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..61cc97489bd3cee12969e640db060314c476efdf --- /dev/null +++ b/joint_model/checkpoint-279/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e695e4dcb8ae17c008abc10305f554d626be890e5a78b9ec4e0637ec6fa3f51 +size 1465 diff --git a/joint_model/checkpoint-279/sentencepiece.bpe.model b/joint_model/checkpoint-279/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/joint_model/checkpoint-279/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/joint_model/checkpoint-279/special_tokens_map.json b/joint_model/checkpoint-279/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/joint_model/checkpoint-279/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/joint_model/checkpoint-279/tokenizer.json b/joint_model/checkpoint-279/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed --- /dev/null +++ b/joint_model/checkpoint-279/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20 +size 17082734 diff --git a/joint_model/checkpoint-279/tokenizer_config.json b/joint_model/checkpoint-279/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24 --- /dev/null +++ b/joint_model/checkpoint-279/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/joint_model/checkpoint-279/trainer_state.json b/joint_model/checkpoint-279/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f1ad0eb3de119d403f6222165ca65ba449b4eb9b --- /dev/null +++ b/joint_model/checkpoint-279/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_global_step": 279, + "best_metric": 1.1101479530334473, + "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-279", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 279, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy_level1": 0.943, + "eval_accuracy_level2": 0.8282, + "eval_f1_level1": 0.9424784900051851, + "eval_f1_level2": 0.7894811362618394, + "eval_loss": 1.1101479530334473, + "eval_runtime": 0.6677, + "eval_samples_per_second": 7488.47, + "eval_steps_per_second": 14.977, + "step": 279 + } + ], + "logging_steps": 500, + "max_steps": 2790, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7345146232307712.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/joint_model/checkpoint-279/training_args.bin b/joint_model/checkpoint-279/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f --- /dev/null +++ b/joint_model/checkpoint-279/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18 +size 5777 diff --git a/joint_model/checkpoint-2790/config.json b/joint_model/checkpoint-2790/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e --- /dev/null +++ b/joint_model/checkpoint-2790/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "HierarchicalXLMRoberta" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "dtype": "float32", + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "transformers_version": "4.56.1", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/joint_model/checkpoint-2790/model.safetensors b/joint_model/checkpoint-2790/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ded8ab3f80c26714783badc9aa73556f9408638f --- /dev/null +++ b/joint_model/checkpoint-2790/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6eb1ccd12e907c8ebc1e8e6300463ba2c384ef7a9f8447a13f02724a1751641 +size 1112408092 diff --git a/joint_model/checkpoint-2790/optimizer.pt b/joint_model/checkpoint-2790/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..be4228bb8cbfb0055ff384a64b1809c199690316 --- /dev/null +++ b/joint_model/checkpoint-2790/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7eb93eeaa3a053fd7b9bb5b84923284c3da9dc559d8b48604005d7dba36326e +size 2224937355 diff --git a/joint_model/checkpoint-2790/rng_state_0.pth b/joint_model/checkpoint-2790/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..1204dfcda519e30b1f0934f725519038ec1ffe46 --- /dev/null +++ b/joint_model/checkpoint-2790/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3694df1592907bff7db49c1f38a514da39a18474d33f5396292f06b8881baf1b +size 16389 diff --git a/joint_model/checkpoint-2790/rng_state_1.pth b/joint_model/checkpoint-2790/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..a04284e4bf2d0f7b5651fec53a0a577d2697bc57 --- /dev/null +++ b/joint_model/checkpoint-2790/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032463486955ad1b697dd64126b425c51f84df9903692266c4b4ceee9cdc6e66 +size 16389 diff --git a/joint_model/checkpoint-2790/rng_state_2.pth b/joint_model/checkpoint-2790/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..efa00c6512f2c0b8f45bc807becc365234c6c771 --- /dev/null +++ b/joint_model/checkpoint-2790/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02fc38a0ae9b600f2035ab0f6ffaed56380fd22b3e8ab03879c99168b6eeec9d +size 16389 diff --git a/joint_model/checkpoint-2790/rng_state_3.pth b/joint_model/checkpoint-2790/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..d8397a60e26b1948b1216b50242887961847bbdd --- /dev/null +++ b/joint_model/checkpoint-2790/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08fd45f971330bec68c9f5feef2f4ec9980d96e954f2be27a8fe7e2b0d902bfc +size 16389 diff --git a/joint_model/checkpoint-2790/rng_state_4.pth b/joint_model/checkpoint-2790/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..93d834d206168b209cab778034ba97d5c59b3ad1 --- /dev/null +++ b/joint_model/checkpoint-2790/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d869459caa121b5225ac1120ee164121cb7dca72fe321f04cba29f30a74c7a11 +size 16389 diff --git a/joint_model/checkpoint-2790/rng_state_5.pth b/joint_model/checkpoint-2790/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..e85533f3144b4263ba9c24f896f396170cc5d832 --- /dev/null +++ b/joint_model/checkpoint-2790/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e39fe9af551dce334937d68faa12a89caef0ed8ad87618af6cf8791ff41b055 +size 16389 diff --git a/joint_model/checkpoint-2790/rng_state_6.pth b/joint_model/checkpoint-2790/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..175cf3b8132a2fc63f434890fbe337add7e0046a --- /dev/null +++ b/joint_model/checkpoint-2790/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c9effc7e010cd6483f53db0c185548d596482ec43106c90d3b51e5f407c292a +size 16389 diff --git a/joint_model/checkpoint-2790/rng_state_7.pth b/joint_model/checkpoint-2790/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..9e804207248e61126d283b205e1b1359d910ee44 --- /dev/null +++ b/joint_model/checkpoint-2790/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bb317d00046b48dc3d3553fb624dd4664da7ce4e3604b361d470b1f3ed65e9b +size 16389 diff --git a/joint_model/checkpoint-2790/scheduler.pt b/joint_model/checkpoint-2790/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..044166dc5cd12a1934def5f8cf057ba8f6b56df7 --- /dev/null +++ b/joint_model/checkpoint-2790/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccef646684698ad189a8d80ef16f815d48dcaa966c52dafed533b1d6a2c27964 +size 1465 diff --git a/joint_model/checkpoint-2790/sentencepiece.bpe.model b/joint_model/checkpoint-2790/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/joint_model/checkpoint-2790/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/joint_model/checkpoint-2790/special_tokens_map.json b/joint_model/checkpoint-2790/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/joint_model/checkpoint-2790/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/joint_model/checkpoint-2790/tokenizer.json b/joint_model/checkpoint-2790/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed --- /dev/null +++ b/joint_model/checkpoint-2790/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20 +size 17082734 diff --git a/joint_model/checkpoint-2790/tokenizer_config.json b/joint_model/checkpoint-2790/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24 --- /dev/null +++ b/joint_model/checkpoint-2790/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/joint_model/checkpoint-2790/trainer_state.json b/joint_model/checkpoint-2790/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bf8712318298f4048b5872da342f77355711091c --- /dev/null +++ b/joint_model/checkpoint-2790/trainer_state.json @@ -0,0 +1,189 @@ +{ + "best_global_step": 2790, + "best_metric": 0.19661813974380493, + "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-2790", + "epoch": 10.0, + "eval_steps": 500, + "global_step": 2790, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy_level1": 0.943, + "eval_accuracy_level2": 0.8282, + "eval_f1_level1": 0.9424784900051851, + "eval_f1_level2": 0.7894811362618394, + "eval_loss": 1.1101479530334473, + "eval_runtime": 0.6677, + "eval_samples_per_second": 7488.47, + "eval_steps_per_second": 14.977, + "step": 279 + }, + { + "epoch": 1.7921146953405018, + "grad_norm": 8.197423934936523, + "learning_rate": 1.642293906810036e-05, + "loss": 2.029, + "step": 500 + }, + { + "epoch": 2.0, + "eval_accuracy_level1": 0.963, + "eval_accuracy_level2": 0.9134, + "eval_f1_level1": 0.962976281751424, + "eval_f1_level2": 0.9010101771001547, + "eval_loss": 0.5648184418678284, + "eval_runtime": 0.6225, + "eval_samples_per_second": 8032.157, + "eval_steps_per_second": 16.064, + "step": 558 + }, + { + "epoch": 3.0, + "eval_accuracy_level1": 0.9684, + "eval_accuracy_level2": 0.9404, + "eval_f1_level1": 0.9683869415305786, + "eval_f1_level2": 0.9353991249189201, + "eval_loss": 0.3886409401893616, + "eval_runtime": 0.7756, + "eval_samples_per_second": 6447.003, + "eval_steps_per_second": 12.894, + "step": 837 + }, + { + "epoch": 3.5842293906810037, + "grad_norm": 9.112466812133789, + "learning_rate": 1.2838709677419356e-05, + "loss": 0.5009, + "step": 1000 + }, + { + "epoch": 4.0, + "eval_accuracy_level1": 0.976, + "eval_accuracy_level2": 0.948, + "eval_f1_level1": 0.9759892345948809, + "eval_f1_level2": 0.944852130743217, + "eval_loss": 0.3113965690135956, + "eval_runtime": 0.7752, + "eval_samples_per_second": 6449.701, + "eval_steps_per_second": 12.899, + "step": 1116 + }, + { + "epoch": 5.0, + "eval_accuracy_level1": 0.9786, + "eval_accuracy_level2": 0.9574, + "eval_f1_level1": 0.9786086703124734, + "eval_f1_level2": 0.9558414971682437, + "eval_loss": 0.2682338356971741, + "eval_runtime": 0.7265, + "eval_samples_per_second": 6882.558, + "eval_steps_per_second": 13.765, + "step": 1395 + }, + { + "epoch": 5.376344086021505, + "grad_norm": 5.6989006996154785, + "learning_rate": 9.254480286738352e-06, + "loss": 0.2818, + "step": 1500 + }, + { + "epoch": 6.0, + "eval_accuracy_level1": 0.9804, + "eval_accuracy_level2": 0.9642, + "eval_f1_level1": 0.9803982577567285, + "eval_f1_level2": 0.9638909785156317, + "eval_loss": 0.2282806634902954, + "eval_runtime": 0.5242, + "eval_samples_per_second": 9538.102, + "eval_steps_per_second": 19.076, + "step": 1674 + }, + { + "epoch": 7.0, + "eval_accuracy_level1": 0.9824, + "eval_accuracy_level2": 0.9664, + "eval_f1_level1": 0.9823983306464108, + "eval_f1_level2": 0.966154693269809, + "eval_loss": 0.2116653025150299, + "eval_runtime": 0.4889, + "eval_samples_per_second": 10226.528, + "eval_steps_per_second": 20.453, + "step": 1953 + }, + { + "epoch": 7.168458781362007, + "grad_norm": 7.060131072998047, + "learning_rate": 5.670250896057348e-06, + "loss": 0.1994, + "step": 2000 + }, + { + "epoch": 8.0, + "eval_accuracy_level1": 0.9826, + "eval_accuracy_level2": 0.969, + "eval_f1_level1": 0.9826127380767093, + "eval_f1_level2": 0.9688130224440087, + "eval_loss": 0.20548628270626068, + "eval_runtime": 0.5897, + "eval_samples_per_second": 8478.651, + "eval_steps_per_second": 16.957, + "step": 2232 + }, + { + "epoch": 8.960573476702509, + "grad_norm": 9.435193061828613, + "learning_rate": 2.086021505376344e-06, + "loss": 0.1544, + "step": 2500 + }, + { + "epoch": 9.0, + "eval_accuracy_level1": 0.9836, + "eval_accuracy_level2": 0.9706, + "eval_f1_level1": 0.9836193977032266, + "eval_f1_level2": 0.9704863338256707, + "eval_loss": 0.20010386407375336, + "eval_runtime": 0.7263, + "eval_samples_per_second": 6884.526, + "eval_steps_per_second": 13.769, + "step": 2511 + }, + { + "epoch": 10.0, + "eval_accuracy_level1": 0.984, + "eval_accuracy_level2": 0.971, + "eval_f1_level1": 0.9840142363029732, + "eval_f1_level2": 0.970881189240863, + "eval_loss": 0.19661813974380493, + "eval_runtime": 0.5736, + "eval_samples_per_second": 8717.27, + "eval_steps_per_second": 17.435, + "step": 2790 + } + ], + "logging_steps": 500, + "max_steps": 2790, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 7.345146289350246e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/joint_model/checkpoint-2790/training_args.bin b/joint_model/checkpoint-2790/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f --- /dev/null +++ b/joint_model/checkpoint-2790/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18 +size 5777 diff --git a/joint_model/checkpoint-558/config.json b/joint_model/checkpoint-558/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e --- /dev/null +++ b/joint_model/checkpoint-558/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "HierarchicalXLMRoberta" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "dtype": "float32", + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "transformers_version": "4.56.1", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/joint_model/checkpoint-558/model.safetensors b/joint_model/checkpoint-558/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ac3fae878321eb6c2e73acd4af336f6a8ae9f34 --- /dev/null +++ b/joint_model/checkpoint-558/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d0e78392ee14b5f51cac6627a6b55a68afe53ed1472facda429a0fd922ffe4e +size 1112408092 diff --git a/joint_model/checkpoint-558/optimizer.pt b/joint_model/checkpoint-558/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b1597e69574355e591c3d4c155fc41c304adcb3 --- /dev/null +++ b/joint_model/checkpoint-558/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ee05b5923bc1138156211fc9d285635ae0cdea24efd627b57f2e109bea903b4 +size 2224937355 diff --git a/joint_model/checkpoint-558/rng_state_0.pth b/joint_model/checkpoint-558/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..22c59c271aa91ae40a13d484e95c77d6344e7611 --- /dev/null +++ b/joint_model/checkpoint-558/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d0b08ac33f223cec939ef2a82779f76df73c1b3070d2185aea1d15f26b285ed +size 16389 diff --git a/joint_model/checkpoint-558/rng_state_1.pth b/joint_model/checkpoint-558/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..1b77031dcb9050662cd5baa3d1c3b3fdfb7fa486 --- /dev/null +++ b/joint_model/checkpoint-558/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daf754a07f7208d1b5249210376c29c1d32533c690c3984eadc45be2abd83432 +size 16389 diff --git a/joint_model/checkpoint-558/rng_state_2.pth b/joint_model/checkpoint-558/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..878eaa18d13aea84b790df0a4fb92cee59696d78 --- /dev/null +++ b/joint_model/checkpoint-558/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96f47fc93c047de8cd7a54bb1da5290b59ab7545af86022265dc627b49a60554 +size 16389 diff --git a/joint_model/checkpoint-558/rng_state_3.pth b/joint_model/checkpoint-558/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5ef7f1850b8436e1ee6c604ec9a52b84ed26db2 --- /dev/null +++ b/joint_model/checkpoint-558/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c81c63dde2d7353453ff13fdc65d31a1ed80f49baa476e2717a451ef4fa726 +size 16389 diff --git a/joint_model/checkpoint-558/rng_state_4.pth b/joint_model/checkpoint-558/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..72d9112d5adf4ecceaaf09664e3195cbae361d4c --- /dev/null +++ b/joint_model/checkpoint-558/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26f756dc8c9c847dba6331631513be6924dcad79409766453e3e75280a0595da +size 16389 diff --git a/joint_model/checkpoint-558/rng_state_5.pth b/joint_model/checkpoint-558/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..18945a9838866eca9d6ae2cc1bd6ca36b617f46c --- /dev/null +++ b/joint_model/checkpoint-558/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea7a79383e8430e4e030fafe149a75c93dc40e69ef9d62e24c9e322b363a2f3 +size 16389 diff --git a/joint_model/checkpoint-558/rng_state_6.pth b/joint_model/checkpoint-558/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..6249af6dae34db55690edbef55d64dd18ed44a25 --- /dev/null +++ b/joint_model/checkpoint-558/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c85a6aa79aeb28d3314cb33b6346ef96c4aac933110943815f9121824fb93ef +size 16389 diff --git a/joint_model/checkpoint-558/rng_state_7.pth b/joint_model/checkpoint-558/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..faf7f8cc91d1f5e285bbe8e38a4f053508028bab --- /dev/null +++ b/joint_model/checkpoint-558/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c878f39a05836b9436f33f1cf9302acb4524421a6f086e59035ec929f79611 +size 16389 diff --git a/joint_model/checkpoint-558/scheduler.pt b/joint_model/checkpoint-558/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d6621ee56f7420b2390c6b22868c81dacca16ae --- /dev/null +++ b/joint_model/checkpoint-558/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af96adfd67e9829e5edf51801a1d6a079e91c6a6adf44d42b4c42ae2206d71d +size 1465 diff --git a/joint_model/checkpoint-558/sentencepiece.bpe.model b/joint_model/checkpoint-558/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/joint_model/checkpoint-558/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/joint_model/checkpoint-558/special_tokens_map.json b/joint_model/checkpoint-558/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/joint_model/checkpoint-558/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/joint_model/checkpoint-558/tokenizer.json b/joint_model/checkpoint-558/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed --- /dev/null +++ b/joint_model/checkpoint-558/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20 +size 17082734 diff --git a/joint_model/checkpoint-558/tokenizer_config.json b/joint_model/checkpoint-558/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24 --- /dev/null +++ b/joint_model/checkpoint-558/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/joint_model/checkpoint-558/trainer_state.json b/joint_model/checkpoint-558/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a70d6e3e14b4b8a1943d01f6921ff9b6bd9fb8b3 --- /dev/null +++ b/joint_model/checkpoint-558/trainer_state.json @@ -0,0 +1,65 @@ +{ + "best_global_step": 558, + "best_metric": 0.5648184418678284, + "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-558", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 558, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy_level1": 0.943, + "eval_accuracy_level2": 0.8282, + "eval_f1_level1": 0.9424784900051851, + "eval_f1_level2": 0.7894811362618394, + "eval_loss": 1.1101479530334473, + "eval_runtime": 0.6677, + "eval_samples_per_second": 7488.47, + "eval_steps_per_second": 14.977, + "step": 279 + }, + { + "epoch": 1.7921146953405018, + "grad_norm": 8.197423934936523, + "learning_rate": 1.642293906810036e-05, + "loss": 2.029, + "step": 500 + }, + { + "epoch": 2.0, + "eval_accuracy_level1": 0.963, + "eval_accuracy_level2": 0.9134, + "eval_f1_level1": 0.962976281751424, + "eval_f1_level2": 0.9010101771001547, + "eval_loss": 0.5648184418678284, + "eval_runtime": 0.6225, + "eval_samples_per_second": 8032.157, + "eval_steps_per_second": 16.064, + "step": 558 + } + ], + "logging_steps": 500, + "max_steps": 2790, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4690292598833152e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/joint_model/checkpoint-558/training_args.bin b/joint_model/checkpoint-558/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f --- /dev/null +++ b/joint_model/checkpoint-558/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18 +size 5777 diff --git a/joint_model/checkpoint-837/config.json b/joint_model/checkpoint-837/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7f388db7884f95e8121c55c1f8ed947a009e9e --- /dev/null +++ b/joint_model/checkpoint-837/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "HierarchicalXLMRoberta" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "dtype": "float32", + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "transformers_version": "4.56.1", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/joint_model/checkpoint-837/model.safetensors b/joint_model/checkpoint-837/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0378493d049e5ca19242f9865e84cfb412551575 --- /dev/null +++ b/joint_model/checkpoint-837/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbdc9576192f76fda5ff7073f488a88a3c707f4e196b71004fe8720f111c0c97 +size 1112408092 diff --git a/joint_model/checkpoint-837/optimizer.pt b/joint_model/checkpoint-837/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5819112e53f8f47014887290262d41893bd646b6 --- /dev/null +++ b/joint_model/checkpoint-837/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a402a284354466004a224d1ff4886b60b44fd578857ae3ac217661f2961e435 +size 2224937355 diff --git a/joint_model/checkpoint-837/rng_state_0.pth b/joint_model/checkpoint-837/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..455f5c62f124af2602e8106dee685fd51e64e289 --- /dev/null +++ b/joint_model/checkpoint-837/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8174d893cf29edfa4a6b3ac809dcc6a14286fc178cc7c02ad6cf43d0c9f0627a +size 16389 diff --git a/joint_model/checkpoint-837/rng_state_1.pth b/joint_model/checkpoint-837/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..578380829a7e0f40e0d17e36ae4cb750867f4c76 --- /dev/null +++ b/joint_model/checkpoint-837/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d92dda4b058d54309f7b8865b9b8bb771992c275e064d8d0a2e2878cb6c48f6d +size 16389 diff --git a/joint_model/checkpoint-837/rng_state_2.pth b/joint_model/checkpoint-837/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..101160d1586d76a59538897543f72c3a33c08339 --- /dev/null +++ b/joint_model/checkpoint-837/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36cc81c51fd8f3bd067fa3778ae226b48e70a0415d700f6b6a3ac320f64adef +size 16389 diff --git a/joint_model/checkpoint-837/rng_state_3.pth b/joint_model/checkpoint-837/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..53194b3efbb3392a51eff68f23e37b910265d1c2 --- /dev/null +++ b/joint_model/checkpoint-837/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8c9f4c96b25a679255a8a7863b55a726bcc322bda28bbb47d831c9d4e8d256f +size 16389 diff --git a/joint_model/checkpoint-837/rng_state_4.pth b/joint_model/checkpoint-837/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..733a394f80b18de6d2fa66b537b0c3c3857ca822 --- /dev/null +++ b/joint_model/checkpoint-837/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b351bd966dcca1cdb1afc30df6abd6eed5b37d1aef9513f4d4fb3ed55ba30d77 +size 16389 diff --git a/joint_model/checkpoint-837/rng_state_5.pth b/joint_model/checkpoint-837/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..26d4d91fc64649e60cdc752712ced3108fd2e884 --- /dev/null +++ b/joint_model/checkpoint-837/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fde6d99daa5ee5f16382eff0274f36d4a3e66d2a19c4d93ac2583ec5d97145e +size 16389 diff --git a/joint_model/checkpoint-837/rng_state_6.pth b/joint_model/checkpoint-837/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..7dfb63f85419cd84c0be2cfc4a7c7dcd87dbe9d0 --- /dev/null +++ b/joint_model/checkpoint-837/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:894daeb25623ff3f610f6dab0e5ad44aa5d790b0644a685341e951902321c585 +size 16389 diff --git a/joint_model/checkpoint-837/rng_state_7.pth b/joint_model/checkpoint-837/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..f2b2d6e881524d6ac5bd96e4c9bc13109a8f820f --- /dev/null +++ b/joint_model/checkpoint-837/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9775aa1d74b9dd14e2c4b03adab017c7bce57f30059be797f865e39d506c1fe4 +size 16389 diff --git a/joint_model/checkpoint-837/scheduler.pt b/joint_model/checkpoint-837/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3fd22075828107167117ca734c13aae6730d3923 --- /dev/null +++ b/joint_model/checkpoint-837/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad9651133913dbf6b81c34375b7481b50a2618f44ce0ab4329fe73988a369a5d +size 1465 diff --git a/joint_model/checkpoint-837/sentencepiece.bpe.model b/joint_model/checkpoint-837/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/joint_model/checkpoint-837/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/joint_model/checkpoint-837/special_tokens_map.json b/joint_model/checkpoint-837/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/joint_model/checkpoint-837/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/joint_model/checkpoint-837/tokenizer.json b/joint_model/checkpoint-837/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7126b1d228f6e5f725b6ee02a88b1118c725aeed --- /dev/null +++ b/joint_model/checkpoint-837/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20 +size 17082734 diff --git a/joint_model/checkpoint-837/tokenizer_config.json b/joint_model/checkpoint-837/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a974a0731aa55a21137e33205c0d71e03d4e24 --- /dev/null +++ b/joint_model/checkpoint-837/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/joint_model/checkpoint-837/trainer_state.json b/joint_model/checkpoint-837/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3a21c1209e58f4e6482c896f428aeb016349350d --- /dev/null +++ b/joint_model/checkpoint-837/trainer_state.json @@ -0,0 +1,77 @@ +{ + "best_global_step": 837, + "best_metric": 0.3886409401893616, + "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-837", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 837, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy_level1": 0.943, + "eval_accuracy_level2": 0.8282, + "eval_f1_level1": 0.9424784900051851, + "eval_f1_level2": 0.7894811362618394, + "eval_loss": 1.1101479530334473, + "eval_runtime": 0.6677, + "eval_samples_per_second": 7488.47, + "eval_steps_per_second": 14.977, + "step": 279 + }, + { + "epoch": 1.7921146953405018, + "grad_norm": 8.197423934936523, + "learning_rate": 1.642293906810036e-05, + "loss": 2.029, + "step": 500 + }, + { + "epoch": 2.0, + "eval_accuracy_level1": 0.963, + "eval_accuracy_level2": 0.9134, + "eval_f1_level1": 0.962976281751424, + "eval_f1_level2": 0.9010101771001547, + "eval_loss": 0.5648184418678284, + "eval_runtime": 0.6225, + "eval_samples_per_second": 8032.157, + "eval_steps_per_second": 16.064, + "step": 558 + }, + { + "epoch": 3.0, + "eval_accuracy_level1": 0.9684, + "eval_accuracy_level2": 0.9404, + "eval_f1_level1": 0.9683869415305786, + "eval_f1_level2": 0.9353991249189201, + "eval_loss": 0.3886409401893616, + "eval_runtime": 0.7756, + "eval_samples_per_second": 6447.003, + "eval_steps_per_second": 12.894, + "step": 837 + } + ], + "logging_steps": 500, + "max_steps": 2790, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.2035438831140864e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/joint_model/checkpoint-837/training_args.bin b/joint_model/checkpoint-837/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8f7cc7b0d4a0500b522ed8682786e4d1dc3312f --- /dev/null +++ b/joint_model/checkpoint-837/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18 +size 5777 diff --git a/joint_model/runs/Sep27_08-45-36_hanoi/events.out.tfevents.1758962744.hanoi.2933948.0 b/joint_model/runs/Sep27_08-45-36_hanoi/events.out.tfevents.1758962744.hanoi.2933948.0 new file mode 100644 index 0000000000000000000000000000000000000000..78c7da2cee63e4d18fb70d7316c234acee440466 --- /dev/null +++ b/joint_model/runs/Sep27_08-45-36_hanoi/events.out.tfevents.1758962744.hanoi.2933948.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74dcc120d2317d5cdfbd2efe48a898320448ae22588348568c207ec24d601ab2 +size 4999 diff --git a/joint_model/runs/Sep27_08-48-29_hanoi/events.out.tfevents.1758962913.hanoi.2934734.0 b/joint_model/runs/Sep27_08-48-29_hanoi/events.out.tfevents.1758962913.hanoi.2934734.0 new file mode 100644 index 0000000000000000000000000000000000000000..2c6c07b53550ff7dc2d7109a8fba47c8044dcf64 --- /dev/null +++ b/joint_model/runs/Sep27_08-48-29_hanoi/events.out.tfevents.1758962913.hanoi.2934734.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9edc8384af7bb48cff1e97e63340883da7f4f40e2e6f539e9eb5b5a1d30393a9 +size 40 diff --git a/joint_model/runs/Sep27_08-50-33_hanoi/events.out.tfevents.1758963043.hanoi.2936293.0 b/joint_model/runs/Sep27_08-50-33_hanoi/events.out.tfevents.1758963043.hanoi.2936293.0 new file mode 100644 index 0000000000000000000000000000000000000000..26916da1d4166a009305e796dc731fcd5187b301 --- /dev/null +++ b/joint_model/runs/Sep27_08-50-33_hanoi/events.out.tfevents.1758963043.hanoi.2936293.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:622c63da7ca0df4b4192de7cbdf8c8dbb299daf82bf067ed95da90a6429dc32b +size 11345 diff --git a/joint_model/runs/Sep27_16-43-47_hanoi/events.out.tfevents.1758991441.hanoi.2994230.0 b/joint_model/runs/Sep27_16-43-47_hanoi/events.out.tfevents.1758991441.hanoi.2994230.0 new file mode 100644 index 0000000000000000000000000000000000000000..510210de93267ba6991236bfa9600c8e5353a7d5 --- /dev/null +++ b/joint_model/runs/Sep27_16-43-47_hanoi/events.out.tfevents.1758991441.hanoi.2994230.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d45a0f0206b97817cef72b7b4a4e2ae36b869297b98c937a095819076206e5 +size 461 diff --git a/joint_model/runs/Sep27_16-44-42_hanoi/events.out.tfevents.1758991495.hanoi.2995062.0 b/joint_model/runs/Sep27_16-44-42_hanoi/events.out.tfevents.1758991495.hanoi.2995062.0 new file mode 100644 index 0000000000000000000000000000000000000000..32f4d58f82a764cf992d089bb11ecd0d210ff7df --- /dev/null +++ b/joint_model/runs/Sep27_16-44-42_hanoi/events.out.tfevents.1758991495.hanoi.2995062.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:988cc3d7efa4aba95f325a2a3f2608959ae5caac59028d608e210960886a2478 +size 461