smiles-tokenizer-package / test_tokenizer.py
suku9's picture
Upload SMILES tokenizer package
4b0a05b verified
raw
history blame contribute delete
793 Bytes
"""Test script for SMILES tokenizer."""
from smiles_tokenizer import SmilesTokenizer
from smiles_tokenizer.utils import prepare_for_gpt2
def main():
tokenizer = SmilesTokenizer()
smiles = "CC(=O)OC1=CC=CC=C1C(=O)O" # Aspirin
print(f"Tokenizing SMILES: {smiles}")
tokens = tokenizer.tokenize([smiles])[0]
print(f"Tokens: {tokens}")
encoded = tokenizer.encode([smiles])[0]
print(f"Encoded: {encoded}")
print("Testing with GPT-2...")
model, tokenizer_wrapper = prepare_for_gpt2(tokenizer)
inputs = tokenizer_wrapper(smiles, return_tensors="pt")
print(f"Model inputs: {inputs}")
outputs = model(**inputs)
print(f"Model output shape: {outputs.logits.shape}")
print("Test completed successfully!")
if __name__ == "__main__":
main()