Upload folder using huggingface_hub
Browse files- .gitattributes +2 -0
- trans_fairseq/data/en-zh/wmt23/dict.en.txt +0 -0
- trans_fairseq/data/en-zh/wmt23/dict.zh.txt +0 -0
- trans_fairseq/data/en-zh/wmt23/preprocess.log +23 -0
- trans_fairseq/data/en-zh/wmt23/test.en-zh.en.bin +3 -0
- trans_fairseq/data/en-zh/wmt23/test.en-zh.en.idx +0 -0
- trans_fairseq/data/en-zh/wmt23/test.en-zh.zh.bin +3 -0
- trans_fairseq/data/en-zh/wmt23/test.en-zh.zh.idx +0 -0
- trans_fairseq/data/en-zh/wmt23/test1.en-zh.en.bin +3 -0
- trans_fairseq/data/en-zh/wmt23/test1.en-zh.en.idx +0 -0
- trans_fairseq/data/en-zh/wmt23/test1.en-zh.zh.bin +3 -0
- trans_fairseq/data/en-zh/wmt23/test1.en-zh.zh.idx +0 -0
- trans_fairseq/data/en-zh/wmt23/test2.en-zh.en.bin +3 -0
- trans_fairseq/data/en-zh/wmt23/test2.en-zh.en.idx +0 -0
- trans_fairseq/data/en-zh/wmt23/test2.en-zh.zh.bin +3 -0
- trans_fairseq/data/en-zh/wmt23/test2.en-zh.zh.idx +0 -0
- trans_fairseq/data/en-zh/wmt23/train.en-zh.en.bin +3 -0
- trans_fairseq/data/en-zh/wmt23/train.en-zh.en.idx +3 -0
- trans_fairseq/data/en-zh/wmt23/train.en-zh.zh.bin +3 -0
- trans_fairseq/data/en-zh/wmt23/train.en-zh.zh.idx +3 -0
- trans_fairseq/data/en-zh/wmt23/valid.en-zh.en.bin +3 -0
- trans_fairseq/data/en-zh/wmt23/valid.en-zh.en.idx +0 -0
- trans_fairseq/data/en-zh/wmt23/valid.en-zh.zh.bin +3 -0
- trans_fairseq/data/en-zh/wmt23/valid.en-zh.zh.idx +0 -0
.gitattributes
CHANGED
|
@@ -52,3 +52,5 @@ trans_fairseq/mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phras
|
|
| 52 |
trans_fairseq/mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
|
| 53 |
trans_fairseq/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
|
| 54 |
trans_fairseq/mosesdecoder/moses/TranslationModel/UG/util/ibm1-align filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 52 |
trans_fairseq/mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
|
| 53 |
trans_fairseq/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
|
| 54 |
trans_fairseq/mosesdecoder/moses/TranslationModel/UG/util/ibm1-align filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
trans_fairseq/data/en-zh/wmt23/train.en-zh.en.idx filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
trans_fairseq/data/en-zh/wmt23/train.en-zh.zh.idx filter=lfs diff=lfs merge=lfs -text
|
trans_fairseq/data/en-zh/wmt23/dict.en.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
trans_fairseq/data/en-zh/wmt23/dict.zh.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
trans_fairseq/data/en-zh/wmt23/preprocess.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=30, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang='en', target_lang='zh', trainpref='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.train', validpref='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.valid', testpref='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.flores,/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt22,/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt23', align_suffix=None, destdir='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/trainable_data_1', thresholdtgt=0, thresholdsrc=0, tgtdict='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpecode_32k/bpecode.zh', srcdict='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpecode_32k/bpecode.en', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
|
| 2 |
+
Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=30, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang='en', target_lang='zh', trainpref='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.train', validpref='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.valid', testpref='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.flores,/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt22,/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt23', align_suffix=None, destdir='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/trainable_data_1', thresholdtgt=0, thresholdsrc=0, tgtdict='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/trainable_data/dict.zh.txt', srcdict='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/trainable_data/dict.en.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
|
| 3 |
+
[en] Dictionary: 46040 types
|
| 4 |
+
[en] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.train.en: 33431411 sents, 890241636 tokens, 0.0% replaced by <unk>
|
| 5 |
+
[en] Dictionary: 46040 types
|
| 6 |
+
[en] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.valid.en: 1999 sents, 59177 tokens, 0.0% replaced by <unk>
|
| 7 |
+
[en] Dictionary: 46040 types
|
| 8 |
+
[en] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.flores.en: 1012 sents, 28474 tokens, 0.00702% replaced by <unk>
|
| 9 |
+
[en] Dictionary: 46040 types
|
| 10 |
+
[en] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt22.en: 2037 sents, 44690 tokens, 0.00224% replaced by <unk>
|
| 11 |
+
[en] Dictionary: 46040 types
|
| 12 |
+
[en] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt23.en: 2074 sents, 47187 tokens, 0.0% replaced by <unk>
|
| 13 |
+
[zh] Dictionary: 60432 types
|
| 14 |
+
[zh] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.train.zh: 33431411 sents, 816506971 tokens, 0.0% replaced by <unk>
|
| 15 |
+
[zh] Dictionary: 60432 types
|
| 16 |
+
[zh] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.valid.zh: 1999 sents, 57690 tokens, 0.00347% replaced by <unk>
|
| 17 |
+
[zh] Dictionary: 60432 types
|
| 18 |
+
[zh] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.flores.zh: 1012 sents, 27872 tokens, 0.0% replaced by <unk>
|
| 19 |
+
[zh] Dictionary: 60432 types
|
| 20 |
+
[zh] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt22.zh: 2037 sents, 41432 tokens, 0.0% replaced by <unk>
|
| 21 |
+
[zh] Dictionary: 60432 types
|
| 22 |
+
[zh] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt23.zh: 2074 sents, 44353 tokens, 0.0% replaced by <unk>
|
| 23 |
+
Wrote preprocessed data to /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/trainable_data_1
|
trans_fairseq/data/en-zh/wmt23/test.en-zh.en.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:867ac85e7f406503543f321cbd7a405dae152cdeac5f6fd0950bb397384d2ced
|
| 3 |
+
size 56948
|
trans_fairseq/data/en-zh/wmt23/test.en-zh.en.idx
ADDED
|
Binary file (12.2 kB). View file
|
|
|
trans_fairseq/data/en-zh/wmt23/test.en-zh.zh.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c84712af5733d5eda0cbe69af485084b19191cc24fbcf8984d1c4ef639fa341f
|
| 3 |
+
size 55744
|
trans_fairseq/data/en-zh/wmt23/test.en-zh.zh.idx
ADDED
|
Binary file (12.2 kB). View file
|
|
|
trans_fairseq/data/en-zh/wmt23/test1.en-zh.en.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfebd0f79af08680cebbfdb599d8e824ee161d3b1f1b36c3aec99a2293925cc3
|
| 3 |
+
size 89380
|
trans_fairseq/data/en-zh/wmt23/test1.en-zh.en.idx
ADDED
|
Binary file (24.5 kB). View file
|
|
|
trans_fairseq/data/en-zh/wmt23/test1.en-zh.zh.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42f8e80ba32e884a59b6963d1102f5eeb42810a57166006f9a4413dc16afeb64
|
| 3 |
+
size 82864
|
trans_fairseq/data/en-zh/wmt23/test1.en-zh.zh.idx
ADDED
|
Binary file (24.5 kB). View file
|
|
|
trans_fairseq/data/en-zh/wmt23/test2.en-zh.en.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a28f1dd8db02de2d26477c4b24b6e1d0fb0a116b091c33529f379dc244ef0600
|
| 3 |
+
size 94374
|
trans_fairseq/data/en-zh/wmt23/test2.en-zh.en.idx
ADDED
|
Binary file (24.9 kB). View file
|
|
|
trans_fairseq/data/en-zh/wmt23/test2.en-zh.zh.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d8a03574fd680d475411735a32636e28db49b87de1d209eef0997f6611a94b2
|
| 3 |
+
size 88706
|
trans_fairseq/data/en-zh/wmt23/test2.en-zh.zh.idx
ADDED
|
Binary file (24.9 kB). View file
|
|
|
trans_fairseq/data/en-zh/wmt23/train.en-zh.en.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51f35e7a186d7de63a6c01a74ca78beff24deb7f2f16f002a56dc633abbcf697
|
| 3 |
+
size 1780483272
|
trans_fairseq/data/en-zh/wmt23/train.en-zh.en.idx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b97f2050d4677afbb222b2fd524d70ef77c80e3f6fde9008f1cd6b84f7c1bcd
|
| 3 |
+
size 401176958
|
trans_fairseq/data/en-zh/wmt23/train.en-zh.zh.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e3aa832e56ba0675a633ce220066207564e7e759f187f13db637f220db43bef
|
| 3 |
+
size 1633013942
|
trans_fairseq/data/en-zh/wmt23/train.en-zh.zh.idx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7b240359c87dce39250e63a0cfe4682af65ea24aff34f9f88175a7f53282dc5
|
| 3 |
+
size 401176958
|
trans_fairseq/data/en-zh/wmt23/valid.en-zh.en.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49c64536e0a89d950bff17c0bbec38dbc0410b8ea3270682ed369de48683243c
|
| 3 |
+
size 118354
|
trans_fairseq/data/en-zh/wmt23/valid.en-zh.en.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
trans_fairseq/data/en-zh/wmt23/valid.en-zh.zh.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d7473da186539de42dc5e1a975234ce989d736df36ae00aa36c11f41c675a3e
|
| 3 |
+
size 115380
|
trans_fairseq/data/en-zh/wmt23/valid.en-zh.zh.idx
ADDED
|
Binary file (24 kB). View file
|
|
|