win2win
/

3-epochs-classifier

Text Classification

Model card Files Files and versions

win2win commited on Apr 7, 2025

Commit

efc1e48

·

verified ·

1 Parent(s): 1ae9149

Create tokenizer_config.json

Files changed (1) hide show

tokenizer_config.json +29 -0

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "added_tokens_decoder": {
+    "0": { "content": "[PAD]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true },
+    "100": { "content": "[UNK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true },
+    "101": { "content": "[CLS]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true },
+    "102": { "content": "[SEP]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true },
+    "103": { "content": "[MASK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "max_length": 512,
+  "model_max_length": 512,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizerFast",  // Changed from "DistilBertTokenizer"
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]",
+  "add_token_type_ids": false  // Added this line
+}