MazenSoft commited on
Commit
c988af0
·
verified ·
1 Parent(s): 46909f3

First model upload

Browse files
Files changed (3) hide show
  1. config.json +1 -1
  2. tokenizer.json +2 -11
  3. tokenizer_config.json +7 -0
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "aubmindlab/bert-base-arabertv02",
3
  "architectures": [
4
  "BertForTokenClassification"
5
  ],
 
1
  {
2
+ "_name_or_path": "MazenSoft/arabert-ner-model",
3
  "architectures": [
4
  "BertForTokenClassification"
5
  ],
tokenizer.json CHANGED
@@ -2,20 +2,11 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 128,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 128
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 0,
16
- "pad_type_id": 0,
17
- "pad_token": "[PAD]"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
+ "padding": null,
 
 
 
 
 
 
 
 
 
10
  "added_tokens": [
11
  {
12
  "id": 0,
tokenizer_config.json CHANGED
@@ -72,16 +72,23 @@
72
  "extra_special_tokens": {},
73
  "mask_token": "[MASK]",
74
  "max_len": 512,
 
75
  "model_max_length": 512,
76
  "never_split": [
77
  "[بريد]",
78
  "[مستخدم]",
79
  "[رابط]"
80
  ],
 
81
  "pad_token": "[PAD]",
 
 
82
  "sep_token": "[SEP]",
 
83
  "strip_accents": null,
84
  "tokenize_chinese_chars": true,
85
  "tokenizer_class": "BertTokenizer",
 
 
86
  "unk_token": "[UNK]"
87
  }
 
72
  "extra_special_tokens": {},
73
  "mask_token": "[MASK]",
74
  "max_len": 512,
75
+ "max_length": 128,
76
  "model_max_length": 512,
77
  "never_split": [
78
  "[بريد]",
79
  "[مستخدم]",
80
  "[رابط]"
81
  ],
82
+ "pad_to_multiple_of": null,
83
  "pad_token": "[PAD]",
84
+ "pad_token_type_id": 0,
85
+ "padding_side": "right",
86
  "sep_token": "[SEP]",
87
+ "stride": 0,
88
  "strip_accents": null,
89
  "tokenize_chinese_chars": true,
90
  "tokenizer_class": "BertTokenizer",
91
+ "truncation_side": "right",
92
+ "truncation_strategy": "longest_first",
93
  "unk_token": "[UNK]"
94
  }