| # third-party imports | |
| from transformers import AutoTokenizer | |
| # constants import | |
| # tokenizer definition | |
| def text_tokenizer(text: str, max_length: int = 512): | |
| tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") | |
| return tokenizer( | |
| text, | |
| max_length=max_length, | |
| padding="max_length", | |
| truncation=True, | |
| return_tensors="pt" | |
| ) |