anhtunguyen98 commited on
Commit
176c816
·
verified ·
1 Parent(s): 382f7eb

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +10 -0
  2. joint_model/checkpoint-1116/config.json +27 -0
  3. joint_model/checkpoint-1116/model.safetensors +3 -0
  4. joint_model/checkpoint-1116/optimizer.pt +3 -0
  5. joint_model/checkpoint-1116/rng_state_0.pth +3 -0
  6. joint_model/checkpoint-1116/rng_state_1.pth +3 -0
  7. joint_model/checkpoint-1116/rng_state_2.pth +3 -0
  8. joint_model/checkpoint-1116/rng_state_3.pth +3 -0
  9. joint_model/checkpoint-1116/rng_state_4.pth +3 -0
  10. joint_model/checkpoint-1116/rng_state_5.pth +3 -0
  11. joint_model/checkpoint-1116/rng_state_6.pth +3 -0
  12. joint_model/checkpoint-1116/rng_state_7.pth +3 -0
  13. joint_model/checkpoint-1116/scheduler.pt +3 -0
  14. joint_model/checkpoint-1116/sentencepiece.bpe.model +3 -0
  15. joint_model/checkpoint-1116/special_tokens_map.json +15 -0
  16. joint_model/checkpoint-1116/tokenizer.json +3 -0
  17. joint_model/checkpoint-1116/tokenizer_config.json +55 -0
  18. joint_model/checkpoint-1116/trainer_state.json +96 -0
  19. joint_model/checkpoint-1116/training_args.bin +3 -0
  20. joint_model/checkpoint-1395/config.json +27 -0
  21. joint_model/checkpoint-1395/model.safetensors +3 -0
  22. joint_model/checkpoint-1395/optimizer.pt +3 -0
  23. joint_model/checkpoint-1395/rng_state_0.pth +3 -0
  24. joint_model/checkpoint-1395/rng_state_1.pth +3 -0
  25. joint_model/checkpoint-1395/rng_state_2.pth +3 -0
  26. joint_model/checkpoint-1395/rng_state_3.pth +3 -0
  27. joint_model/checkpoint-1395/rng_state_4.pth +3 -0
  28. joint_model/checkpoint-1395/rng_state_5.pth +3 -0
  29. joint_model/checkpoint-1395/rng_state_6.pth +3 -0
  30. joint_model/checkpoint-1395/rng_state_7.pth +3 -0
  31. joint_model/checkpoint-1395/scheduler.pt +3 -0
  32. joint_model/checkpoint-1395/sentencepiece.bpe.model +3 -0
  33. joint_model/checkpoint-1395/special_tokens_map.json +15 -0
  34. joint_model/checkpoint-1395/tokenizer.json +3 -0
  35. joint_model/checkpoint-1395/tokenizer_config.json +55 -0
  36. joint_model/checkpoint-1395/trainer_state.json +108 -0
  37. joint_model/checkpoint-1395/training_args.bin +3 -0
  38. joint_model/checkpoint-1674/config.json +27 -0
  39. joint_model/checkpoint-1674/model.safetensors +3 -0
  40. joint_model/checkpoint-1674/optimizer.pt +3 -0
  41. joint_model/checkpoint-1674/rng_state_0.pth +3 -0
  42. joint_model/checkpoint-1674/rng_state_1.pth +3 -0
  43. joint_model/checkpoint-1674/rng_state_2.pth +3 -0
  44. joint_model/checkpoint-1674/rng_state_3.pth +3 -0
  45. joint_model/checkpoint-1674/rng_state_4.pth +3 -0
  46. joint_model/checkpoint-1674/rng_state_5.pth +3 -0
  47. joint_model/checkpoint-1674/rng_state_6.pth +3 -0
  48. joint_model/checkpoint-1674/rng_state_7.pth +3 -0
  49. joint_model/checkpoint-1674/scheduler.pt +3 -0
  50. joint_model/checkpoint-1674/sentencepiece.bpe.model +3 -0
.gitattributes CHANGED
@@ -38,3 +38,13 @@ xlm/hierarchical_xlm_roberta/checkpoint-2688/tokenizer.json filter=lfs diff=lfs
38
  xlm/hierarchical_xlm_roberta/checkpoint-3584/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
  xlm/hierarchical_xlm_roberta/checkpoint-4480/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
  xlm/hierarchical_xlm_roberta/checkpoint-896/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
38
  xlm/hierarchical_xlm_roberta/checkpoint-3584/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
  xlm/hierarchical_xlm_roberta/checkpoint-4480/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
  xlm/hierarchical_xlm_roberta/checkpoint-896/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ joint_model/checkpoint-1116/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
+ joint_model/checkpoint-1395/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
+ joint_model/checkpoint-1674/tokenizer.json filter=lfs diff=lfs merge=lfs -text
44
+ joint_model/checkpoint-1953/tokenizer.json filter=lfs diff=lfs merge=lfs -text
45
+ joint_model/checkpoint-2232/tokenizer.json filter=lfs diff=lfs merge=lfs -text
46
+ joint_model/checkpoint-2511/tokenizer.json filter=lfs diff=lfs merge=lfs -text
47
+ joint_model/checkpoint-279/tokenizer.json filter=lfs diff=lfs merge=lfs -text
48
+ joint_model/checkpoint-2790/tokenizer.json filter=lfs diff=lfs merge=lfs -text
49
+ joint_model/checkpoint-558/tokenizer.json filter=lfs diff=lfs merge=lfs -text
50
+ joint_model/checkpoint-837/tokenizer.json filter=lfs diff=lfs merge=lfs -text
joint_model/checkpoint-1116/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "HierarchicalXLMRoberta"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "xlm-roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "transformers_version": "4.56.1",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
joint_model/checkpoint-1116/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6792b2f6f167bf98323526de92da30fc30176242f8e06b2af08a1f76528af6d2
3
+ size 1112408092
joint_model/checkpoint-1116/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09fab47c4a460fe4a9c21057cf29f5557d19a2d723b628bc0898a267dadaa022
3
+ size 2224937355
joint_model/checkpoint-1116/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c2ca6cda30f0e67f92af2785341362f9fd75975cfa0e1e3edf170b31356982d
3
+ size 16389
joint_model/checkpoint-1116/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e19c421d330efe795fe888269f2979bdecc155b8ec1bae695ab646f830eda58
3
+ size 16389
joint_model/checkpoint-1116/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4bfe1bdb6ffcc11de3dfa36f4f902a025948dd1a9d55f9e2a0c37a0c71d8993
3
+ size 16389
joint_model/checkpoint-1116/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7db4f6f2db3a4f2891f0c6123c6790aa8182a24731540060e1c5162bdfcf332
3
+ size 16389
joint_model/checkpoint-1116/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e325614869957231c07b72ed17ee0c0dec786269ff93abf17b16883c45761f74
3
+ size 16389
joint_model/checkpoint-1116/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f06f2380c4f07e128133b104b687744df3b4064ecc4b332886d237fcca20b844
3
+ size 16389
joint_model/checkpoint-1116/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6806143edb28cfaf20fbcd82eda60303d3570b7414ff8996f022cf2b33990496
3
+ size 16389
joint_model/checkpoint-1116/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd5cdf368ea3798bd2e333443bb2d4214053bef95ce8af4541fba9630a6adda4
3
+ size 16389
joint_model/checkpoint-1116/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d771b00dd64d2f869efc678cde233c82e3d5d80f32fc0afc37d60c16f5276a7
3
+ size 1465
joint_model/checkpoint-1116/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
joint_model/checkpoint-1116/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
joint_model/checkpoint-1116/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
3
+ size 17082734
joint_model/checkpoint-1116/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 512,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "XLMRobertaTokenizer",
54
+ "unk_token": "<unk>"
55
+ }
joint_model/checkpoint-1116/trainer_state.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1116,
3
+ "best_metric": 0.3113965690135956,
4
+ "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-1116",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1116,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy_level1": 0.943,
15
+ "eval_accuracy_level2": 0.8282,
16
+ "eval_f1_level1": 0.9424784900051851,
17
+ "eval_f1_level2": 0.7894811362618394,
18
+ "eval_loss": 1.1101479530334473,
19
+ "eval_runtime": 0.6677,
20
+ "eval_samples_per_second": 7488.47,
21
+ "eval_steps_per_second": 14.977,
22
+ "step": 279
23
+ },
24
+ {
25
+ "epoch": 1.7921146953405018,
26
+ "grad_norm": 8.197423934936523,
27
+ "learning_rate": 1.642293906810036e-05,
28
+ "loss": 2.029,
29
+ "step": 500
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "eval_accuracy_level1": 0.963,
34
+ "eval_accuracy_level2": 0.9134,
35
+ "eval_f1_level1": 0.962976281751424,
36
+ "eval_f1_level2": 0.9010101771001547,
37
+ "eval_loss": 0.5648184418678284,
38
+ "eval_runtime": 0.6225,
39
+ "eval_samples_per_second": 8032.157,
40
+ "eval_steps_per_second": 16.064,
41
+ "step": 558
42
+ },
43
+ {
44
+ "epoch": 3.0,
45
+ "eval_accuracy_level1": 0.9684,
46
+ "eval_accuracy_level2": 0.9404,
47
+ "eval_f1_level1": 0.9683869415305786,
48
+ "eval_f1_level2": 0.9353991249189201,
49
+ "eval_loss": 0.3886409401893616,
50
+ "eval_runtime": 0.7756,
51
+ "eval_samples_per_second": 6447.003,
52
+ "eval_steps_per_second": 12.894,
53
+ "step": 837
54
+ },
55
+ {
56
+ "epoch": 3.5842293906810037,
57
+ "grad_norm": 9.112466812133789,
58
+ "learning_rate": 1.2838709677419356e-05,
59
+ "loss": 0.5009,
60
+ "step": 1000
61
+ },
62
+ {
63
+ "epoch": 4.0,
64
+ "eval_accuracy_level1": 0.976,
65
+ "eval_accuracy_level2": 0.948,
66
+ "eval_f1_level1": 0.9759892345948809,
67
+ "eval_f1_level2": 0.944852130743217,
68
+ "eval_loss": 0.3113965690135956,
69
+ "eval_runtime": 0.7752,
70
+ "eval_samples_per_second": 6449.701,
71
+ "eval_steps_per_second": 12.899,
72
+ "step": 1116
73
+ }
74
+ ],
75
+ "logging_steps": 500,
76
+ "max_steps": 2790,
77
+ "num_input_tokens_seen": 0,
78
+ "num_train_epochs": 10,
79
+ "save_steps": 500,
80
+ "stateful_callbacks": {
81
+ "TrainerControl": {
82
+ "args": {
83
+ "should_epoch_stop": false,
84
+ "should_evaluate": false,
85
+ "should_log": false,
86
+ "should_save": true,
87
+ "should_training_stop": false
88
+ },
89
+ "attributes": {}
90
+ }
91
+ },
92
+ "total_flos": 2.9380585063448576e+16,
93
+ "train_batch_size": 64,
94
+ "trial_name": null,
95
+ "trial_params": null
96
+ }
joint_model/checkpoint-1116/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
3
+ size 5777
joint_model/checkpoint-1395/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "HierarchicalXLMRoberta"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "xlm-roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "transformers_version": "4.56.1",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
joint_model/checkpoint-1395/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c41e0216d9d3862040536a7fd9ba6b254a014777b0d7f1662f492db7a971f406
3
+ size 1112408092
joint_model/checkpoint-1395/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5607b6daf0f0a370ccef9270477c2e9a830246c8af0dd0c7fe8549c739d2e9ac
3
+ size 2224937355
joint_model/checkpoint-1395/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:372eabe9ee1b698bd155ed64252bb52ecd85f363df08d4a6c4512f0e000cb9b7
3
+ size 16389
joint_model/checkpoint-1395/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d00e489391354b35849c16d9fa756e7012f5711d57ab1683f71c55ef187b9dd2
3
+ size 16389
joint_model/checkpoint-1395/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a0cf5772222d7ef83bf5bfa008bd16ddb5100e0b47d95a97be765c501e4e3cc
3
+ size 16389
joint_model/checkpoint-1395/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:622b3cd66dcaf83e587a5e0c31a6797932cb588d1367575466e0b8eac0b6b732
3
+ size 16389
joint_model/checkpoint-1395/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dfb97eabb2e40d9e6d49009ad4b964b2c73fa42fbf32c8b1f59ae56bf87d92e
3
+ size 16389
joint_model/checkpoint-1395/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c2e0af24431f9c87afbbee452b8c4cb68e55978cc475aca99862285217c6f8a
3
+ size 16389
joint_model/checkpoint-1395/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c209482d19626584d72a324c23675cfbc298544cbd62b33073d59f67aa1d16e
3
+ size 16389
joint_model/checkpoint-1395/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1547a4b17eaa25162d8c2232659379d5dc26ac63c151280e984e29b60718d591
3
+ size 16389
joint_model/checkpoint-1395/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f9f1682436aaf9d161d72dc7e582154d9fbe0f817b46fd9cdc83dc2d8f4ef8f
3
+ size 1465
joint_model/checkpoint-1395/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
joint_model/checkpoint-1395/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
joint_model/checkpoint-1395/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
3
+ size 17082734
joint_model/checkpoint-1395/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 512,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "XLMRobertaTokenizer",
54
+ "unk_token": "<unk>"
55
+ }
joint_model/checkpoint-1395/trainer_state.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1395,
3
+ "best_metric": 0.2682338356971741,
4
+ "best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-1395",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1395,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy_level1": 0.943,
15
+ "eval_accuracy_level2": 0.8282,
16
+ "eval_f1_level1": 0.9424784900051851,
17
+ "eval_f1_level2": 0.7894811362618394,
18
+ "eval_loss": 1.1101479530334473,
19
+ "eval_runtime": 0.6677,
20
+ "eval_samples_per_second": 7488.47,
21
+ "eval_steps_per_second": 14.977,
22
+ "step": 279
23
+ },
24
+ {
25
+ "epoch": 1.7921146953405018,
26
+ "grad_norm": 8.197423934936523,
27
+ "learning_rate": 1.642293906810036e-05,
28
+ "loss": 2.029,
29
+ "step": 500
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "eval_accuracy_level1": 0.963,
34
+ "eval_accuracy_level2": 0.9134,
35
+ "eval_f1_level1": 0.962976281751424,
36
+ "eval_f1_level2": 0.9010101771001547,
37
+ "eval_loss": 0.5648184418678284,
38
+ "eval_runtime": 0.6225,
39
+ "eval_samples_per_second": 8032.157,
40
+ "eval_steps_per_second": 16.064,
41
+ "step": 558
42
+ },
43
+ {
44
+ "epoch": 3.0,
45
+ "eval_accuracy_level1": 0.9684,
46
+ "eval_accuracy_level2": 0.9404,
47
+ "eval_f1_level1": 0.9683869415305786,
48
+ "eval_f1_level2": 0.9353991249189201,
49
+ "eval_loss": 0.3886409401893616,
50
+ "eval_runtime": 0.7756,
51
+ "eval_samples_per_second": 6447.003,
52
+ "eval_steps_per_second": 12.894,
53
+ "step": 837
54
+ },
55
+ {
56
+ "epoch": 3.5842293906810037,
57
+ "grad_norm": 9.112466812133789,
58
+ "learning_rate": 1.2838709677419356e-05,
59
+ "loss": 0.5009,
60
+ "step": 1000
61
+ },
62
+ {
63
+ "epoch": 4.0,
64
+ "eval_accuracy_level1": 0.976,
65
+ "eval_accuracy_level2": 0.948,
66
+ "eval_f1_level1": 0.9759892345948809,
67
+ "eval_f1_level2": 0.944852130743217,
68
+ "eval_loss": 0.3113965690135956,
69
+ "eval_runtime": 0.7752,
70
+ "eval_samples_per_second": 6449.701,
71
+ "eval_steps_per_second": 12.899,
72
+ "step": 1116
73
+ },
74
+ {
75
+ "epoch": 5.0,
76
+ "eval_accuracy_level1": 0.9786,
77
+ "eval_accuracy_level2": 0.9574,
78
+ "eval_f1_level1": 0.9786086703124734,
79
+ "eval_f1_level2": 0.9558414971682437,
80
+ "eval_loss": 0.2682338356971741,
81
+ "eval_runtime": 0.7265,
82
+ "eval_samples_per_second": 6882.558,
83
+ "eval_steps_per_second": 13.765,
84
+ "step": 1395
85
+ }
86
+ ],
87
+ "logging_steps": 500,
88
+ "max_steps": 2790,
89
+ "num_input_tokens_seen": 0,
90
+ "num_train_epochs": 10,
91
+ "save_steps": 500,
92
+ "stateful_callbacks": {
93
+ "TrainerControl": {
94
+ "args": {
95
+ "should_epoch_stop": false,
96
+ "should_evaluate": false,
97
+ "should_log": false,
98
+ "should_save": true,
99
+ "should_training_stop": false
100
+ },
101
+ "attributes": {}
102
+ }
103
+ },
104
+ "total_flos": 3.672573129575629e+16,
105
+ "train_batch_size": 64,
106
+ "trial_name": null,
107
+ "trial_params": null
108
+ }
joint_model/checkpoint-1395/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
3
+ size 5777
joint_model/checkpoint-1674/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "HierarchicalXLMRoberta"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "xlm-roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "transformers_version": "4.56.1",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
joint_model/checkpoint-1674/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07732aadf5ff31e4f11ced15e3b3f675dcec364a1c0b188e03180a6719bbc0bc
3
+ size 1112408092
joint_model/checkpoint-1674/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c772fa9b4c710c806b65a2db5c8842dd78cbf61f45a0b88ae50f5bedc42508d0
3
+ size 2224937355
joint_model/checkpoint-1674/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:244fd61bffb73015ca283aaf0b690244b4bf656c6c488f789153206cc5ca419c
3
+ size 16389
joint_model/checkpoint-1674/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a85bf43502cd59c3e7002fdd28d84f2755d1d50b9f8395ece5cce57a33e6a2de
3
+ size 16389
joint_model/checkpoint-1674/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f36f28619c5d75933e40cf2ed1e4be028945b0f4185cc9e11476f543d2e8d7c
3
+ size 16389
joint_model/checkpoint-1674/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b838f4cf0f902e11d576f4b832468a92c12d05620e73c537a7302c86e09c2752
3
+ size 16389
joint_model/checkpoint-1674/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c946fcb7164ab82957cc2fd2229fec1ea962b6eb3608757ad9302a5956a5782
3
+ size 16389
joint_model/checkpoint-1674/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1498a3eb9fd6a568db26513c62c91ef104aa0da2637df120f18cbd8604a6fefc
3
+ size 16389
joint_model/checkpoint-1674/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f449c8299ada96be66c0e08d0603887c01bcef56d3f3c2bf63a0fc8a43664aa
3
+ size 16389
joint_model/checkpoint-1674/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a2a661a72b9f31cca93340f54a4c320474844987c784955cd6cf6c6f2d2cd65
3
+ size 16389
joint_model/checkpoint-1674/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e9460e5ee67dc8bfa729d93fdbc410d0dd92c60badc5c6eac4d11137e2d9011
3
+ size 1465
joint_model/checkpoint-1674/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051