SVECTOR-OFFICIAL commited on
Commit
ffdc618
·
verified ·
1 Parent(s): 1a89d4b

Delete tessar_tokenizer_example.py

Browse files
Files changed (1) hide show
  1. tessar_tokenizer_example.py +0 -22
tessar_tokenizer_example.py DELETED
@@ -1,22 +0,0 @@
1
- # Standard usage with default settings
2
- tokenizer = TessarTokenizer.from_pretrained("SVECTOR-CORPORATION/Tessar-largest")
3
-
4
- # Tokenize a single piece of text
5
- text = "Hello, how are you doing today?"
6
- encoded = tokenizer(text, return_tensors="pt")
7
-
8
- # Batch tokenization of multiple texts
9
- texts = [
10
- "Hello, world!",
11
- "This is a test sentence.",
12
- "Tokenization is an important NLP task."
13
- ]
14
- batch_encoded = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
15
-
16
- # Custom tokenizer with specific settings
17
- custom_tokenizer = TessarTokenizer(
18
- do_lower_case=True,
19
- max_cell_length=20,
20
- unk_token="[UNK]",
21
- pad_token="[PAD]"
22
- )