"""Test script for SMILES tokenizer.""" from smiles_tokenizer import SmilesTokenizer from smiles_tokenizer.utils import prepare_for_gpt2 def main(): tokenizer = SmilesTokenizer() smiles = "CC(=O)OC1=CC=CC=C1C(=O)O" # Aspirin print(f"Tokenizing SMILES: {smiles}") tokens = tokenizer.tokenize([smiles])[0] print(f"Tokens: {tokens}") encoded = tokenizer.encode([smiles])[0] print(f"Encoded: {encoded}") print("Testing with GPT-2...") model, tokenizer_wrapper = prepare_for_gpt2(tokenizer) inputs = tokenizer_wrapper(smiles, return_tensors="pt") print(f"Model inputs: {inputs}") outputs = model(**inputs) print(f"Model output shape: {outputs.logits.shape}") print("Test completed successfully!") if __name__ == "__main__": main()