latishab
/

turnsense

Text Classification

Model card Files Files and versions

latishab commited on Mar 13, 2025

Commit

2ae8c32

·

verified ·

1 Parent(s): dddd6ef

Upload tokenizer

Files changed (2) hide show

tokenizer.json +7 -4
tokenizer_config.json +4 -0

tokenizer.json CHANGED Viewed

@@ -1,10 +1,13 @@
 {
   "version": "1.0",
-  "truncation": null,
   "padding": {
-    "strategy": {
-      "Fixed": 256
-    },
     "direction": "Right",
     "pad_to_multiple_of": null,
     "pad_id": 2,

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 128,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
   "padding": {
+    "strategy": "BatchLongest",
     "direction": "Right",
     "pad_to_multiple_of": null,
     "pad_id": 2,

tokenizer_config.json CHANGED Viewed

@@ -147,8 +147,12 @@
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
   "extra_special_tokens": {},
   "model_max_length": 8192,
   "pad_token": "<|im_end|>",
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>",
   "vocab_size": 49152

   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
   "extra_special_tokens": {},
+  "max_length": 256,
   "model_max_length": 8192,
+  "pad_to_multiple_of": null,
   "pad_token": "<|im_end|>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>",
   "vocab_size": 49152