agentlans commited on
Commit
0676873
·
verified ·
1 Parent(s): ffec1ea

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ source.spm filter=lfs diff=lfs merge=lfs -text
37
+ target.spm filter=lfs diff=lfs merge=lfs -text
all_results.json CHANGED
@@ -1,11 +1,16 @@
1
  {
2
- "epoch": 3.0,
3
- "num_input_tokens_seen": 67927868,
4
- "total_flos": 1.7989403580039168e+16,
5
- "train_loss": 1.1922986107342055,
6
- "train_runtime": 2155.3414,
7
- "train_samples": 310916,
8
- "train_samples_per_second": 432.761,
9
- "train_steps_per_second": 54.096,
10
- "train_tokens_per_second": 31520.605
 
 
 
 
 
11
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_loss": 1.961527705192566,
4
+ "eval_runtime": 77.3959,
5
+ "eval_samples": 83311,
6
+ "eval_samples_per_second": 1076.426,
7
+ "eval_steps_per_second": 134.555,
8
+ "num_input_tokens_seen": 225160303,
9
+ "total_flos": 5.962942279994573e+16,
10
+ "train_loss": 1.8857571468166456,
11
+ "train_runtime": 16282.8843,
12
+ "train_samples": 333243,
13
+ "train_samples_per_second": 204.658,
14
+ "train_steps_per_second": 25.583,
15
+ "train_tokens_per_second": 13822.702
16
  }
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "Helsinki-NLP/opus-mt-en-zh",
3
  "activation_dropout": 0.0,
4
  "activation_function": "swish",
5
  "add_bias_logits": false,
@@ -8,11 +7,6 @@
8
  "MarianMTModel"
9
  ],
10
  "attention_dropout": 0.0,
11
- "bad_words_ids": [
12
- [
13
- 65000
14
- ]
15
- ],
16
  "bos_token_id": 0,
17
  "classif_dropout": 0.0,
18
  "classifier_dropout": 0.0,
@@ -46,19 +40,19 @@
46
  "LABEL_1": 1,
47
  "LABEL_2": 2
48
  },
49
- "max_length": 512,
50
  "max_position_embeddings": 512,
51
  "model_type": "marian",
52
  "normalize_before": false,
53
  "normalize_embedding": false,
54
- "num_beams": 4,
55
  "num_hidden_layers": 6,
56
  "pad_token_id": 65000,
57
  "scale_embedding": true,
58
  "share_encoder_decoder_embeddings": true,
59
  "static_position_embeddings": true,
60
  "torch_dtype": "float32",
61
- "transformers_version": "4.43.3",
62
  "use_cache": true,
63
  "vocab_size": 65001
64
  }
 
1
  {
 
2
  "activation_dropout": 0.0,
3
  "activation_function": "swish",
4
  "add_bias_logits": false,
 
7
  "MarianMTModel"
8
  ],
9
  "attention_dropout": 0.0,
 
 
 
 
 
10
  "bos_token_id": 0,
11
  "classif_dropout": 0.0,
12
  "classifier_dropout": 0.0,
 
40
  "LABEL_1": 1,
41
  "LABEL_2": 2
42
  },
43
+ "max_length": null,
44
  "max_position_embeddings": 512,
45
  "model_type": "marian",
46
  "normalize_before": false,
47
  "normalize_embedding": false,
48
+ "num_beams": null,
49
  "num_hidden_layers": 6,
50
  "pad_token_id": 65000,
51
  "scale_embedding": true,
52
  "share_encoder_decoder_embeddings": true,
53
  "static_position_embeddings": true,
54
  "torch_dtype": "float32",
55
+ "transformers_version": "4.51.3",
56
  "use_cache": true,
57
  "vocab_size": 65001
58
  }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_loss": 1.961527705192566,
4
+ "eval_runtime": 77.3959,
5
+ "eval_samples": 83311,
6
+ "eval_samples_per_second": 1076.426,
7
+ "eval_steps_per_second": 134.555,
8
+ "num_input_tokens_seen": 225160303
9
+ }
generation_config.json CHANGED
@@ -12,5 +12,5 @@
12
  "num_beams": 4,
13
  "pad_token_id": 65000,
14
  "renormalize_logits": true,
15
- "transformers_version": "4.43.3"
16
  }
 
12
  "num_beams": 4,
13
  "pad_token_id": 65000,
14
  "renormalize_logits": true,
15
+ "transformers_version": "4.51.3"
16
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2527b97d2fa376dbfe40f7b62f5adbf21e09020a8d156d2cc43e7773b2b565cd
3
  size 309965092
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aac5f96b0d1c2435c122ac11eef69a78fde5924dbd76351f1a233d6c0f28037c
3
  size 309965092
source.spm CHANGED
Binary files a/source.spm and b/source.spm differ
 
target.spm CHANGED
Binary files a/target.spm and b/target.spm differ
 
tokenizer_config.json CHANGED
@@ -25,8 +25,9 @@
25
  "special": true
26
  }
27
  },
28
- "clean_up_tokenization_spaces": true,
29
  "eos_token": "</s>",
 
30
  "model_max_length": 512,
31
  "pad_token": "<pad>",
32
  "separate_vocabs": false,
 
25
  "special": true
26
  }
27
  },
28
+ "clean_up_tokenization_spaces": false,
29
  "eos_token": "</s>",
30
+ "extra_special_tokens": {},
31
  "model_max_length": 512,
32
  "pad_token": "<pad>",
33
  "separate_vocabs": false,
train_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 3.0,
3
- "num_input_tokens_seen": 67927868,
4
- "total_flos": 1.7989403580039168e+16,
5
- "train_loss": 1.1922986107342055,
6
- "train_runtime": 2155.3414,
7
- "train_samples": 310916,
8
- "train_samples_per_second": 432.761,
9
- "train_steps_per_second": 54.096,
10
- "train_tokens_per_second": 31520.605
11
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "num_input_tokens_seen": 225160303,
4
+ "total_flos": 5.962942279994573e+16,
5
+ "train_loss": 1.8857571468166456,
6
+ "train_runtime": 16282.8843,
7
+ "train_samples": 333243,
8
+ "train_samples_per_second": 204.658,
9
+ "train_steps_per_second": 25.583,
10
+ "train_tokens_per_second": 13822.702
11
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2ab7adeb91926a4ec4ef2638ee7aa6be062e73ea336464fe5971f4ecd328639
3
- size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:742b6e553683f5a8be3739142255defff5d894b12172be0396ab15afd216cd8a
3
+ size 5496