File size: 1,572 Bytes
ee7017c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env python3
"""
Test that existing notebook code still works with updated HF files
"""

from Bio.Seq import Seq
from transformers import GPT2LMHeadModel, GPT2Tokenizer, LogitsProcessor
import torch

print("Testing notebook compatibility...")

try:
    # Import the custom components (they should be downloaded already)
    from tokenizer import CodonTokenizer
    from synonymous_logit_processor import generate_candidate_codons_with_generate
    
    # Load model and tokenizer (notebook style)
    print("Loading model and tokenizer...")
    model = GPT2LMHeadModel.from_pretrained("naniltx/codonGPT")
    tokenizer = CodonTokenizer()
    print("✓ Model and tokenizer loaded successfully")
    
    # Test the exact notebook usage pattern
    print("\nTesting notebook usage pattern...")
    
    # Example usage (from your notebook):
    initial_codons = ["GCT", "TGT", "GAT"]
    initial_codons = ['ATG', 'GAA', 'CTT', 'GTC']  # This overwrites the previous line
    print("The initial prompt codons are:", " ".join(initial_codons))
    
    # This should work with global model/tokenizer variables
    generated_codons_generate = generate_candidate_codons_with_generate(initial_codons, temperature=0.7, top_k=5)
    print("Generated with model.generate():", " ".join(generated_codons_generate))
    
    print("\n✅ Notebook compatibility test passed!")
    print("Your existing notebook code will continue to work unchanged.")
    
except Exception as e:
    print(f"\n❌ Compatibility test failed: {e}")
    import traceback
    traceback.print_exc()