Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +10 -0
- joint_model/checkpoint-1116/config.json +27 -0
- joint_model/checkpoint-1116/model.safetensors +3 -0
- joint_model/checkpoint-1116/optimizer.pt +3 -0
- joint_model/checkpoint-1116/rng_state_0.pth +3 -0
- joint_model/checkpoint-1116/rng_state_1.pth +3 -0
- joint_model/checkpoint-1116/rng_state_2.pth +3 -0
- joint_model/checkpoint-1116/rng_state_3.pth +3 -0
- joint_model/checkpoint-1116/rng_state_4.pth +3 -0
- joint_model/checkpoint-1116/rng_state_5.pth +3 -0
- joint_model/checkpoint-1116/rng_state_6.pth +3 -0
- joint_model/checkpoint-1116/rng_state_7.pth +3 -0
- joint_model/checkpoint-1116/scheduler.pt +3 -0
- joint_model/checkpoint-1116/sentencepiece.bpe.model +3 -0
- joint_model/checkpoint-1116/special_tokens_map.json +15 -0
- joint_model/checkpoint-1116/tokenizer.json +3 -0
- joint_model/checkpoint-1116/tokenizer_config.json +55 -0
- joint_model/checkpoint-1116/trainer_state.json +96 -0
- joint_model/checkpoint-1116/training_args.bin +3 -0
- joint_model/checkpoint-1395/config.json +27 -0
- joint_model/checkpoint-1395/model.safetensors +3 -0
- joint_model/checkpoint-1395/optimizer.pt +3 -0
- joint_model/checkpoint-1395/rng_state_0.pth +3 -0
- joint_model/checkpoint-1395/rng_state_1.pth +3 -0
- joint_model/checkpoint-1395/rng_state_2.pth +3 -0
- joint_model/checkpoint-1395/rng_state_3.pth +3 -0
- joint_model/checkpoint-1395/rng_state_4.pth +3 -0
- joint_model/checkpoint-1395/rng_state_5.pth +3 -0
- joint_model/checkpoint-1395/rng_state_6.pth +3 -0
- joint_model/checkpoint-1395/rng_state_7.pth +3 -0
- joint_model/checkpoint-1395/scheduler.pt +3 -0
- joint_model/checkpoint-1395/sentencepiece.bpe.model +3 -0
- joint_model/checkpoint-1395/special_tokens_map.json +15 -0
- joint_model/checkpoint-1395/tokenizer.json +3 -0
- joint_model/checkpoint-1395/tokenizer_config.json +55 -0
- joint_model/checkpoint-1395/trainer_state.json +108 -0
- joint_model/checkpoint-1395/training_args.bin +3 -0
- joint_model/checkpoint-1674/config.json +27 -0
- joint_model/checkpoint-1674/model.safetensors +3 -0
- joint_model/checkpoint-1674/optimizer.pt +3 -0
- joint_model/checkpoint-1674/rng_state_0.pth +3 -0
- joint_model/checkpoint-1674/rng_state_1.pth +3 -0
- joint_model/checkpoint-1674/rng_state_2.pth +3 -0
- joint_model/checkpoint-1674/rng_state_3.pth +3 -0
- joint_model/checkpoint-1674/rng_state_4.pth +3 -0
- joint_model/checkpoint-1674/rng_state_5.pth +3 -0
- joint_model/checkpoint-1674/rng_state_6.pth +3 -0
- joint_model/checkpoint-1674/rng_state_7.pth +3 -0
- joint_model/checkpoint-1674/scheduler.pt +3 -0
- joint_model/checkpoint-1674/sentencepiece.bpe.model +3 -0
.gitattributes
CHANGED
|
@@ -38,3 +38,13 @@ xlm/hierarchical_xlm_roberta/checkpoint-2688/tokenizer.json filter=lfs diff=lfs
|
|
| 38 |
xlm/hierarchical_xlm_roberta/checkpoint-3584/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
xlm/hierarchical_xlm_roberta/checkpoint-4480/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
xlm/hierarchical_xlm_roberta/checkpoint-896/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
xlm/hierarchical_xlm_roberta/checkpoint-3584/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
xlm/hierarchical_xlm_roberta/checkpoint-4480/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
xlm/hierarchical_xlm_roberta/checkpoint-896/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
joint_model/checkpoint-1116/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
joint_model/checkpoint-1395/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
joint_model/checkpoint-1674/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
joint_model/checkpoint-1953/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
joint_model/checkpoint-2232/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
joint_model/checkpoint-2511/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
joint_model/checkpoint-279/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
joint_model/checkpoint-2790/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
joint_model/checkpoint-558/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
joint_model/checkpoint-837/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
joint_model/checkpoint-1116/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"HierarchicalXLMRoberta"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 768,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 3072,
|
| 15 |
+
"layer_norm_eps": 1e-05,
|
| 16 |
+
"max_position_embeddings": 514,
|
| 17 |
+
"model_type": "xlm-roberta",
|
| 18 |
+
"num_attention_heads": 12,
|
| 19 |
+
"num_hidden_layers": 12,
|
| 20 |
+
"output_past": true,
|
| 21 |
+
"pad_token_id": 1,
|
| 22 |
+
"position_embedding_type": "absolute",
|
| 23 |
+
"transformers_version": "4.56.1",
|
| 24 |
+
"type_vocab_size": 1,
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 250002
|
| 27 |
+
}
|
joint_model/checkpoint-1116/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6792b2f6f167bf98323526de92da30fc30176242f8e06b2af08a1f76528af6d2
|
| 3 |
+
size 1112408092
|
joint_model/checkpoint-1116/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09fab47c4a460fe4a9c21057cf29f5557d19a2d723b628bc0898a267dadaa022
|
| 3 |
+
size 2224937355
|
joint_model/checkpoint-1116/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c2ca6cda30f0e67f92af2785341362f9fd75975cfa0e1e3edf170b31356982d
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1116/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e19c421d330efe795fe888269f2979bdecc155b8ec1bae695ab646f830eda58
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1116/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4bfe1bdb6ffcc11de3dfa36f4f902a025948dd1a9d55f9e2a0c37a0c71d8993
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1116/rng_state_3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7db4f6f2db3a4f2891f0c6123c6790aa8182a24731540060e1c5162bdfcf332
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1116/rng_state_4.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e325614869957231c07b72ed17ee0c0dec786269ff93abf17b16883c45761f74
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1116/rng_state_5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f06f2380c4f07e128133b104b687744df3b4064ecc4b332886d237fcca20b844
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1116/rng_state_6.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6806143edb28cfaf20fbcd82eda60303d3570b7414ff8996f022cf2b33990496
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1116/rng_state_7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd5cdf368ea3798bd2e333443bb2d4214053bef95ce8af4541fba9630a6adda4
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1116/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d771b00dd64d2f869efc678cde233c82e3d5d80f32fc0afc37d60c16f5276a7
|
| 3 |
+
size 1465
|
joint_model/checkpoint-1116/sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|
joint_model/checkpoint-1116/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"cls_token": "<s>",
|
| 4 |
+
"eos_token": "</s>",
|
| 5 |
+
"mask_token": {
|
| 6 |
+
"content": "<mask>",
|
| 7 |
+
"lstrip": true,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
joint_model/checkpoint-1116/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
|
| 3 |
+
size 17082734
|
joint_model/checkpoint-1116/tokenizer_config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"model_max_length": 512,
|
| 51 |
+
"pad_token": "<pad>",
|
| 52 |
+
"sep_token": "</s>",
|
| 53 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 54 |
+
"unk_token": "<unk>"
|
| 55 |
+
}
|
joint_model/checkpoint-1116/trainer_state.json
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 1116,
|
| 3 |
+
"best_metric": 0.3113965690135956,
|
| 4 |
+
"best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-1116",
|
| 5 |
+
"epoch": 4.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 1116,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy_level1": 0.943,
|
| 15 |
+
"eval_accuracy_level2": 0.8282,
|
| 16 |
+
"eval_f1_level1": 0.9424784900051851,
|
| 17 |
+
"eval_f1_level2": 0.7894811362618394,
|
| 18 |
+
"eval_loss": 1.1101479530334473,
|
| 19 |
+
"eval_runtime": 0.6677,
|
| 20 |
+
"eval_samples_per_second": 7488.47,
|
| 21 |
+
"eval_steps_per_second": 14.977,
|
| 22 |
+
"step": 279
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"epoch": 1.7921146953405018,
|
| 26 |
+
"grad_norm": 8.197423934936523,
|
| 27 |
+
"learning_rate": 1.642293906810036e-05,
|
| 28 |
+
"loss": 2.029,
|
| 29 |
+
"step": 500
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"epoch": 2.0,
|
| 33 |
+
"eval_accuracy_level1": 0.963,
|
| 34 |
+
"eval_accuracy_level2": 0.9134,
|
| 35 |
+
"eval_f1_level1": 0.962976281751424,
|
| 36 |
+
"eval_f1_level2": 0.9010101771001547,
|
| 37 |
+
"eval_loss": 0.5648184418678284,
|
| 38 |
+
"eval_runtime": 0.6225,
|
| 39 |
+
"eval_samples_per_second": 8032.157,
|
| 40 |
+
"eval_steps_per_second": 16.064,
|
| 41 |
+
"step": 558
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"epoch": 3.0,
|
| 45 |
+
"eval_accuracy_level1": 0.9684,
|
| 46 |
+
"eval_accuracy_level2": 0.9404,
|
| 47 |
+
"eval_f1_level1": 0.9683869415305786,
|
| 48 |
+
"eval_f1_level2": 0.9353991249189201,
|
| 49 |
+
"eval_loss": 0.3886409401893616,
|
| 50 |
+
"eval_runtime": 0.7756,
|
| 51 |
+
"eval_samples_per_second": 6447.003,
|
| 52 |
+
"eval_steps_per_second": 12.894,
|
| 53 |
+
"step": 837
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"epoch": 3.5842293906810037,
|
| 57 |
+
"grad_norm": 9.112466812133789,
|
| 58 |
+
"learning_rate": 1.2838709677419356e-05,
|
| 59 |
+
"loss": 0.5009,
|
| 60 |
+
"step": 1000
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"epoch": 4.0,
|
| 64 |
+
"eval_accuracy_level1": 0.976,
|
| 65 |
+
"eval_accuracy_level2": 0.948,
|
| 66 |
+
"eval_f1_level1": 0.9759892345948809,
|
| 67 |
+
"eval_f1_level2": 0.944852130743217,
|
| 68 |
+
"eval_loss": 0.3113965690135956,
|
| 69 |
+
"eval_runtime": 0.7752,
|
| 70 |
+
"eval_samples_per_second": 6449.701,
|
| 71 |
+
"eval_steps_per_second": 12.899,
|
| 72 |
+
"step": 1116
|
| 73 |
+
}
|
| 74 |
+
],
|
| 75 |
+
"logging_steps": 500,
|
| 76 |
+
"max_steps": 2790,
|
| 77 |
+
"num_input_tokens_seen": 0,
|
| 78 |
+
"num_train_epochs": 10,
|
| 79 |
+
"save_steps": 500,
|
| 80 |
+
"stateful_callbacks": {
|
| 81 |
+
"TrainerControl": {
|
| 82 |
+
"args": {
|
| 83 |
+
"should_epoch_stop": false,
|
| 84 |
+
"should_evaluate": false,
|
| 85 |
+
"should_log": false,
|
| 86 |
+
"should_save": true,
|
| 87 |
+
"should_training_stop": false
|
| 88 |
+
},
|
| 89 |
+
"attributes": {}
|
| 90 |
+
}
|
| 91 |
+
},
|
| 92 |
+
"total_flos": 2.9380585063448576e+16,
|
| 93 |
+
"train_batch_size": 64,
|
| 94 |
+
"trial_name": null,
|
| 95 |
+
"trial_params": null
|
| 96 |
+
}
|
joint_model/checkpoint-1116/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
|
| 3 |
+
size 5777
|
joint_model/checkpoint-1395/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"HierarchicalXLMRoberta"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 768,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 3072,
|
| 15 |
+
"layer_norm_eps": 1e-05,
|
| 16 |
+
"max_position_embeddings": 514,
|
| 17 |
+
"model_type": "xlm-roberta",
|
| 18 |
+
"num_attention_heads": 12,
|
| 19 |
+
"num_hidden_layers": 12,
|
| 20 |
+
"output_past": true,
|
| 21 |
+
"pad_token_id": 1,
|
| 22 |
+
"position_embedding_type": "absolute",
|
| 23 |
+
"transformers_version": "4.56.1",
|
| 24 |
+
"type_vocab_size": 1,
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 250002
|
| 27 |
+
}
|
joint_model/checkpoint-1395/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c41e0216d9d3862040536a7fd9ba6b254a014777b0d7f1662f492db7a971f406
|
| 3 |
+
size 1112408092
|
joint_model/checkpoint-1395/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5607b6daf0f0a370ccef9270477c2e9a830246c8af0dd0c7fe8549c739d2e9ac
|
| 3 |
+
size 2224937355
|
joint_model/checkpoint-1395/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:372eabe9ee1b698bd155ed64252bb52ecd85f363df08d4a6c4512f0e000cb9b7
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1395/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d00e489391354b35849c16d9fa756e7012f5711d57ab1683f71c55ef187b9dd2
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1395/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a0cf5772222d7ef83bf5bfa008bd16ddb5100e0b47d95a97be765c501e4e3cc
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1395/rng_state_3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:622b3cd66dcaf83e587a5e0c31a6797932cb588d1367575466e0b8eac0b6b732
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1395/rng_state_4.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dfb97eabb2e40d9e6d49009ad4b964b2c73fa42fbf32c8b1f59ae56bf87d92e
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1395/rng_state_5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c2e0af24431f9c87afbbee452b8c4cb68e55978cc475aca99862285217c6f8a
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1395/rng_state_6.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c209482d19626584d72a324c23675cfbc298544cbd62b33073d59f67aa1d16e
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1395/rng_state_7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1547a4b17eaa25162d8c2232659379d5dc26ac63c151280e984e29b60718d591
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1395/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f9f1682436aaf9d161d72dc7e582154d9fbe0f817b46fd9cdc83dc2d8f4ef8f
|
| 3 |
+
size 1465
|
joint_model/checkpoint-1395/sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|
joint_model/checkpoint-1395/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"cls_token": "<s>",
|
| 4 |
+
"eos_token": "</s>",
|
| 5 |
+
"mask_token": {
|
| 6 |
+
"content": "<mask>",
|
| 7 |
+
"lstrip": true,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
joint_model/checkpoint-1395/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
|
| 3 |
+
size 17082734
|
joint_model/checkpoint-1395/tokenizer_config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"model_max_length": 512,
|
| 51 |
+
"pad_token": "<pad>",
|
| 52 |
+
"sep_token": "</s>",
|
| 53 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 54 |
+
"unk_token": "<unk>"
|
| 55 |
+
}
|
joint_model/checkpoint-1395/trainer_state.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 1395,
|
| 3 |
+
"best_metric": 0.2682338356971741,
|
| 4 |
+
"best_model_checkpoint": "./checkpoints/joint_model_robust/checkpoint-1395",
|
| 5 |
+
"epoch": 5.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 1395,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy_level1": 0.943,
|
| 15 |
+
"eval_accuracy_level2": 0.8282,
|
| 16 |
+
"eval_f1_level1": 0.9424784900051851,
|
| 17 |
+
"eval_f1_level2": 0.7894811362618394,
|
| 18 |
+
"eval_loss": 1.1101479530334473,
|
| 19 |
+
"eval_runtime": 0.6677,
|
| 20 |
+
"eval_samples_per_second": 7488.47,
|
| 21 |
+
"eval_steps_per_second": 14.977,
|
| 22 |
+
"step": 279
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"epoch": 1.7921146953405018,
|
| 26 |
+
"grad_norm": 8.197423934936523,
|
| 27 |
+
"learning_rate": 1.642293906810036e-05,
|
| 28 |
+
"loss": 2.029,
|
| 29 |
+
"step": 500
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"epoch": 2.0,
|
| 33 |
+
"eval_accuracy_level1": 0.963,
|
| 34 |
+
"eval_accuracy_level2": 0.9134,
|
| 35 |
+
"eval_f1_level1": 0.962976281751424,
|
| 36 |
+
"eval_f1_level2": 0.9010101771001547,
|
| 37 |
+
"eval_loss": 0.5648184418678284,
|
| 38 |
+
"eval_runtime": 0.6225,
|
| 39 |
+
"eval_samples_per_second": 8032.157,
|
| 40 |
+
"eval_steps_per_second": 16.064,
|
| 41 |
+
"step": 558
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"epoch": 3.0,
|
| 45 |
+
"eval_accuracy_level1": 0.9684,
|
| 46 |
+
"eval_accuracy_level2": 0.9404,
|
| 47 |
+
"eval_f1_level1": 0.9683869415305786,
|
| 48 |
+
"eval_f1_level2": 0.9353991249189201,
|
| 49 |
+
"eval_loss": 0.3886409401893616,
|
| 50 |
+
"eval_runtime": 0.7756,
|
| 51 |
+
"eval_samples_per_second": 6447.003,
|
| 52 |
+
"eval_steps_per_second": 12.894,
|
| 53 |
+
"step": 837
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"epoch": 3.5842293906810037,
|
| 57 |
+
"grad_norm": 9.112466812133789,
|
| 58 |
+
"learning_rate": 1.2838709677419356e-05,
|
| 59 |
+
"loss": 0.5009,
|
| 60 |
+
"step": 1000
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"epoch": 4.0,
|
| 64 |
+
"eval_accuracy_level1": 0.976,
|
| 65 |
+
"eval_accuracy_level2": 0.948,
|
| 66 |
+
"eval_f1_level1": 0.9759892345948809,
|
| 67 |
+
"eval_f1_level2": 0.944852130743217,
|
| 68 |
+
"eval_loss": 0.3113965690135956,
|
| 69 |
+
"eval_runtime": 0.7752,
|
| 70 |
+
"eval_samples_per_second": 6449.701,
|
| 71 |
+
"eval_steps_per_second": 12.899,
|
| 72 |
+
"step": 1116
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 5.0,
|
| 76 |
+
"eval_accuracy_level1": 0.9786,
|
| 77 |
+
"eval_accuracy_level2": 0.9574,
|
| 78 |
+
"eval_f1_level1": 0.9786086703124734,
|
| 79 |
+
"eval_f1_level2": 0.9558414971682437,
|
| 80 |
+
"eval_loss": 0.2682338356971741,
|
| 81 |
+
"eval_runtime": 0.7265,
|
| 82 |
+
"eval_samples_per_second": 6882.558,
|
| 83 |
+
"eval_steps_per_second": 13.765,
|
| 84 |
+
"step": 1395
|
| 85 |
+
}
|
| 86 |
+
],
|
| 87 |
+
"logging_steps": 500,
|
| 88 |
+
"max_steps": 2790,
|
| 89 |
+
"num_input_tokens_seen": 0,
|
| 90 |
+
"num_train_epochs": 10,
|
| 91 |
+
"save_steps": 500,
|
| 92 |
+
"stateful_callbacks": {
|
| 93 |
+
"TrainerControl": {
|
| 94 |
+
"args": {
|
| 95 |
+
"should_epoch_stop": false,
|
| 96 |
+
"should_evaluate": false,
|
| 97 |
+
"should_log": false,
|
| 98 |
+
"should_save": true,
|
| 99 |
+
"should_training_stop": false
|
| 100 |
+
},
|
| 101 |
+
"attributes": {}
|
| 102 |
+
}
|
| 103 |
+
},
|
| 104 |
+
"total_flos": 3.672573129575629e+16,
|
| 105 |
+
"train_batch_size": 64,
|
| 106 |
+
"trial_name": null,
|
| 107 |
+
"trial_params": null
|
| 108 |
+
}
|
joint_model/checkpoint-1395/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c61cfce4a431b5c6aaf6df7e91cc42eb1031611f44550485e4e6ebe452339c18
|
| 3 |
+
size 5777
|
joint_model/checkpoint-1674/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"HierarchicalXLMRoberta"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 768,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 3072,
|
| 15 |
+
"layer_norm_eps": 1e-05,
|
| 16 |
+
"max_position_embeddings": 514,
|
| 17 |
+
"model_type": "xlm-roberta",
|
| 18 |
+
"num_attention_heads": 12,
|
| 19 |
+
"num_hidden_layers": 12,
|
| 20 |
+
"output_past": true,
|
| 21 |
+
"pad_token_id": 1,
|
| 22 |
+
"position_embedding_type": "absolute",
|
| 23 |
+
"transformers_version": "4.56.1",
|
| 24 |
+
"type_vocab_size": 1,
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 250002
|
| 27 |
+
}
|
joint_model/checkpoint-1674/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07732aadf5ff31e4f11ced15e3b3f675dcec364a1c0b188e03180a6719bbc0bc
|
| 3 |
+
size 1112408092
|
joint_model/checkpoint-1674/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c772fa9b4c710c806b65a2db5c8842dd78cbf61f45a0b88ae50f5bedc42508d0
|
| 3 |
+
size 2224937355
|
joint_model/checkpoint-1674/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:244fd61bffb73015ca283aaf0b690244b4bf656c6c488f789153206cc5ca419c
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1674/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a85bf43502cd59c3e7002fdd28d84f2755d1d50b9f8395ece5cce57a33e6a2de
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1674/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f36f28619c5d75933e40cf2ed1e4be028945b0f4185cc9e11476f543d2e8d7c
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1674/rng_state_3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b838f4cf0f902e11d576f4b832468a92c12d05620e73c537a7302c86e09c2752
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1674/rng_state_4.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c946fcb7164ab82957cc2fd2229fec1ea962b6eb3608757ad9302a5956a5782
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1674/rng_state_5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1498a3eb9fd6a568db26513c62c91ef104aa0da2637df120f18cbd8604a6fefc
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1674/rng_state_6.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f449c8299ada96be66c0e08d0603887c01bcef56d3f3c2bf63a0fc8a43664aa
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1674/rng_state_7.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a2a661a72b9f31cca93340f54a4c320474844987c784955cd6cf6c6f2d2cd65
|
| 3 |
+
size 16389
|
joint_model/checkpoint-1674/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e9460e5ee67dc8bfa729d93fdbc410d0dd92c60badc5c6eac4d11137e2d9011
|
| 3 |
+
size 1465
|
joint_model/checkpoint-1674/sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|