theekshana
/

sinhala_albert

Text Classification

Generated from Trainer

Model card Files Files and versions

theekshana commited on Jun 8, 2024

Commit

13484fe

·

verified ·

1 Parent(s): c150e94

Upload tokenizer

Files changed (2) hide show

README.md +1 -1
tokenizer_config.json +8 -1

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 ---
 license: apache-2.0
-base_model: albert-base-v2
 tags:
 - classification
 - sentiment
 - sinhala
 - news data
 - generated_from_trainer
 model-index:
 - name: sinhala_albert
   results: []

 ---
 license: apache-2.0
 tags:
 - classification
 - sentiment
 - sinhala
 - news data
 - generated_from_trainer
+base_model: albert-base-v2
 model-index:
 - name: sinhala_albert
   results: []

tokenizer_config.json CHANGED Viewed

@@ -44,9 +44,16 @@
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "mask_token": "[MASK]",
-  "model_max_length": 64,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "tokenizer_class": "PreTrainedTokenizerFast",
   "unk_token": "[UNK]"
 }

   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "mask_token": "[MASK]",
+  "max_length": 64,
+  "model_max_length": 512,
+  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "sep_token": "[SEP]",
+  "stride": 0,
   "tokenizer_class": "PreTrainedTokenizerFast",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }