Biu3010 commited on
Commit
ed866d4
·
verified ·
1 Parent(s): a84a26e

Upload folder using huggingface_hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<central>": 250055,
3
+ "<northern>": 250054,
4
+ "<southern>": 250056
5
+ }
config.json CHANGED
@@ -53,5 +53,5 @@
53
  "tokenizer_class": "MBart50Tokenizer",
54
  "transformers_version": "4.57.1",
55
  "use_cache": true,
56
- "vocab_size": 250054
57
  }
 
53
  "tokenizer_class": "MBart50Tokenizer",
54
  "transformers_version": "4.57.1",
55
  "use_cache": true,
56
+ "vocab_size": 250057
57
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4330486b9db12a44daece2a06925a3529d6ecf737b89548b7e8ab448f7491fde
3
- size 2444578688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce6f5e772d40e23373efb6a3cd4218cf190e687a5100cdede10cf2b5f86285f5
3
+ size 2444590988
special_tokens_map.json CHANGED
@@ -1,57 +1,26 @@
1
  {
2
  "additional_special_tokens": [
3
- "ar_AR",
4
- "cs_CZ",
5
- "de_DE",
6
- "en_XX",
7
- "es_XX",
8
- "et_EE",
9
- "fi_FI",
10
- "fr_XX",
11
- "gu_IN",
12
- "hi_IN",
13
- "it_IT",
14
- "ja_XX",
15
- "kk_KZ",
16
- "ko_KR",
17
- "lt_LT",
18
- "lv_LV",
19
- "my_MM",
20
- "ne_NP",
21
- "nl_XX",
22
- "ro_RO",
23
- "ru_RU",
24
- "si_LK",
25
- "tr_TR",
26
- "vi_VN",
27
- "zh_CN",
28
- "af_ZA",
29
- "az_AZ",
30
- "bn_IN",
31
- "fa_IR",
32
- "he_IL",
33
- "hr_HR",
34
- "id_ID",
35
- "ka_GE",
36
- "km_KH",
37
- "mk_MK",
38
- "ml_IN",
39
- "mn_MN",
40
- "mr_IN",
41
- "pl_PL",
42
- "ps_AF",
43
- "pt_XX",
44
- "sv_SE",
45
- "sw_KE",
46
- "ta_IN",
47
- "te_IN",
48
- "th_TH",
49
- "tl_XX",
50
- "uk_UA",
51
- "ur_PK",
52
- "xh_ZA",
53
- "gl_ES",
54
- "sl_SI"
55
  ],
56
  "bos_token": "<s>",
57
  "cls_token": "<s>",
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<northern>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<central>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<southern>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ],
25
  "bos_token": "<s>",
26
  "cls_token": "<s>",
tokenizer_config.json CHANGED
@@ -455,61 +455,36 @@
455
  "rstrip": false,
456
  "single_word": false,
457
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  }
459
  },
460
  "additional_special_tokens": [
461
- "ar_AR",
462
- "cs_CZ",
463
- "de_DE",
464
- "en_XX",
465
- "es_XX",
466
- "et_EE",
467
- "fi_FI",
468
- "fr_XX",
469
- "gu_IN",
470
- "hi_IN",
471
- "it_IT",
472
- "ja_XX",
473
- "kk_KZ",
474
- "ko_KR",
475
- "lt_LT",
476
- "lv_LV",
477
- "my_MM",
478
- "ne_NP",
479
- "nl_XX",
480
- "ro_RO",
481
- "ru_RU",
482
- "si_LK",
483
- "tr_TR",
484
- "vi_VN",
485
- "zh_CN",
486
- "af_ZA",
487
- "az_AZ",
488
- "bn_IN",
489
- "fa_IR",
490
- "he_IL",
491
- "hr_HR",
492
- "id_ID",
493
- "ka_GE",
494
- "km_KH",
495
- "mk_MK",
496
- "ml_IN",
497
- "mn_MN",
498
- "mr_IN",
499
- "pl_PL",
500
- "ps_AF",
501
- "pt_XX",
502
- "sv_SE",
503
- "sw_KE",
504
- "ta_IN",
505
- "te_IN",
506
- "th_TH",
507
- "tl_XX",
508
- "uk_UA",
509
- "ur_PK",
510
- "xh_ZA",
511
- "gl_ES",
512
- "sl_SI"
513
  ],
514
  "bos_token": "<s>",
515
  "clean_up_tokenization_spaces": false,
 
455
  "rstrip": false,
456
  "single_word": false,
457
  "special": true
458
+ },
459
+ "250054": {
460
+ "content": "<northern>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "250055": {
468
+ "content": "<central>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "250056": {
476
+ "content": "<southern>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
  }
483
  },
484
  "additional_special_tokens": [
485
+ "<northern>",
486
+ "<central>",
487
+ "<southern>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  ],
489
  "bos_token": "<s>",
490
  "clean_up_tokenization_spaces": false,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe9142a39cbcaa92f63dfe1d8be6465a546de4013aea437f8b05b9ca75c263cc
3
  size 5969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fcf0880a068fe9e26ebe93aa8abde4a6c35820f6d6656ce30defd741c516c76
3
  size 5969