smiles-tokenizer / gpt2_tokenizer.py
suku9's picture
Upload SMILES tokenizer
0f51b32 verified
"""GPT2 Tokenizer that loads our custom SMILES tokenizer."""
import os
import json
import torch
from transformers import PreTrainedTokenizer
from .smiles_tokenizer import SmilesTokenizer, SmilesVocabulary
class GPT2Tokenizer(PreTrainedTokenizer):
"""
GPT2Tokenizer wrapper for our SMILES tokenizer.
This class exists only to make AutoTokenizer find our tokenizer.
"""
def __init__(self, **kwargs):
from .tokenizer_class import HFSmilesTokenizer
self.tokenizer = HFSmilesTokenizer(**kwargs)
super().__init__(**kwargs)
def __getattr__(self, name):
return getattr(self.tokenizer, name)
def __call__(self, *args, **kwargs):
return self.tokenizer(*args, **kwargs)