sleepyhead111 commited on
Commit
f9109fb
·
verified ·
1 Parent(s): a8b45d3

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -52,3 +52,5 @@ trans_fairseq/mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phras
52
  trans_fairseq/mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
53
  trans_fairseq/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
54
  trans_fairseq/mosesdecoder/moses/TranslationModel/UG/util/ibm1-align filter=lfs diff=lfs merge=lfs -text
 
 
 
52
  trans_fairseq/mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
53
  trans_fairseq/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
54
  trans_fairseq/mosesdecoder/moses/TranslationModel/UG/util/ibm1-align filter=lfs diff=lfs merge=lfs -text
55
+ trans_fairseq/data/en-zh/wmt23/train.en-zh.en.idx filter=lfs diff=lfs merge=lfs -text
56
+ trans_fairseq/data/en-zh/wmt23/train.en-zh.zh.idx filter=lfs diff=lfs merge=lfs -text
trans_fairseq/data/en-zh/wmt23/dict.en.txt ADDED
The diff for this file is too large to render. See raw diff
 
trans_fairseq/data/en-zh/wmt23/dict.zh.txt ADDED
The diff for this file is too large to render. See raw diff
 
trans_fairseq/data/en-zh/wmt23/preprocess.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=30, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang='en', target_lang='zh', trainpref='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.train', validpref='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.valid', testpref='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.flores,/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt22,/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt23', align_suffix=None, destdir='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/trainable_data_1', thresholdtgt=0, thresholdsrc=0, tgtdict='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpecode_32k/bpecode.zh', srcdict='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpecode_32k/bpecode.en', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
2
+ Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=30, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang='en', target_lang='zh', trainpref='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.train', validpref='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.valid', testpref='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.flores,/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt22,/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt23', align_suffix=None, destdir='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/trainable_data_1', thresholdtgt=0, thresholdsrc=0, tgtdict='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/trainable_data/dict.zh.txt', srcdict='/mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/trainable_data/dict.en.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
3
+ [en] Dictionary: 46040 types
4
+ [en] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.train.en: 33431411 sents, 890241636 tokens, 0.0% replaced by <unk>
5
+ [en] Dictionary: 46040 types
6
+ [en] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.valid.en: 1999 sents, 59177 tokens, 0.0% replaced by <unk>
7
+ [en] Dictionary: 46040 types
8
+ [en] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.flores.en: 1012 sents, 28474 tokens, 0.00702% replaced by <unk>
9
+ [en] Dictionary: 46040 types
10
+ [en] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt22.en: 2037 sents, 44690 tokens, 0.00224% replaced by <unk>
11
+ [en] Dictionary: 46040 types
12
+ [en] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt23.en: 2074 sents, 47187 tokens, 0.0% replaced by <unk>
13
+ [zh] Dictionary: 60432 types
14
+ [zh] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.train.zh: 33431411 sents, 816506971 tokens, 0.0% replaced by <unk>
15
+ [zh] Dictionary: 60432 types
16
+ [zh] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.valid.zh: 1999 sents, 57690 tokens, 0.00347% replaced by <unk>
17
+ [zh] Dictionary: 60432 types
18
+ [zh] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.flores.zh: 1012 sents, 27872 tokens, 0.0% replaced by <unk>
19
+ [zh] Dictionary: 60432 types
20
+ [zh] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt22.zh: 2037 sents, 41432 tokens, 0.0% replaced by <unk>
21
+ [zh] Dictionary: 60432 types
22
+ [zh] /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/bpe/bpe.test.wmt23.zh: 2074 sents, 44353 tokens, 0.0% replaced by <unk>
23
+ Wrote preprocessed data to /mnt/ouyangyx/trans_fairseq/nmt/data/en2zh/wmt23-50M/trainable_data_1
trans_fairseq/data/en-zh/wmt23/test.en-zh.en.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:867ac85e7f406503543f321cbd7a405dae152cdeac5f6fd0950bb397384d2ced
3
+ size 56948
trans_fairseq/data/en-zh/wmt23/test.en-zh.en.idx ADDED
Binary file (12.2 kB). View file
 
trans_fairseq/data/en-zh/wmt23/test.en-zh.zh.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84712af5733d5eda0cbe69af485084b19191cc24fbcf8984d1c4ef639fa341f
3
+ size 55744
trans_fairseq/data/en-zh/wmt23/test.en-zh.zh.idx ADDED
Binary file (12.2 kB). View file
 
trans_fairseq/data/en-zh/wmt23/test1.en-zh.en.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfebd0f79af08680cebbfdb599d8e824ee161d3b1f1b36c3aec99a2293925cc3
3
+ size 89380
trans_fairseq/data/en-zh/wmt23/test1.en-zh.en.idx ADDED
Binary file (24.5 kB). View file
 
trans_fairseq/data/en-zh/wmt23/test1.en-zh.zh.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42f8e80ba32e884a59b6963d1102f5eeb42810a57166006f9a4413dc16afeb64
3
+ size 82864
trans_fairseq/data/en-zh/wmt23/test1.en-zh.zh.idx ADDED
Binary file (24.5 kB). View file
 
trans_fairseq/data/en-zh/wmt23/test2.en-zh.en.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a28f1dd8db02de2d26477c4b24b6e1d0fb0a116b091c33529f379dc244ef0600
3
+ size 94374
trans_fairseq/data/en-zh/wmt23/test2.en-zh.en.idx ADDED
Binary file (24.9 kB). View file
 
trans_fairseq/data/en-zh/wmt23/test2.en-zh.zh.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d8a03574fd680d475411735a32636e28db49b87de1d209eef0997f6611a94b2
3
+ size 88706
trans_fairseq/data/en-zh/wmt23/test2.en-zh.zh.idx ADDED
Binary file (24.9 kB). View file
 
trans_fairseq/data/en-zh/wmt23/train.en-zh.en.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51f35e7a186d7de63a6c01a74ca78beff24deb7f2f16f002a56dc633abbcf697
3
+ size 1780483272
trans_fairseq/data/en-zh/wmt23/train.en-zh.en.idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b97f2050d4677afbb222b2fd524d70ef77c80e3f6fde9008f1cd6b84f7c1bcd
3
+ size 401176958
trans_fairseq/data/en-zh/wmt23/train.en-zh.zh.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e3aa832e56ba0675a633ce220066207564e7e759f187f13db637f220db43bef
3
+ size 1633013942
trans_fairseq/data/en-zh/wmt23/train.en-zh.zh.idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7b240359c87dce39250e63a0cfe4682af65ea24aff34f9f88175a7f53282dc5
3
+ size 401176958
trans_fairseq/data/en-zh/wmt23/valid.en-zh.en.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49c64536e0a89d950bff17c0bbec38dbc0410b8ea3270682ed369de48683243c
3
+ size 118354
trans_fairseq/data/en-zh/wmt23/valid.en-zh.en.idx ADDED
Binary file (24 kB). View file
 
trans_fairseq/data/en-zh/wmt23/valid.en-zh.zh.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d7473da186539de42dc5e1a975234ce989d736df36ae00aa36c11f41c675a3e
3
+ size 115380
trans_fairseq/data/en-zh/wmt23/valid.en-zh.zh.idx ADDED
Binary file (24 kB). View file