File size: 2,206 Bytes
8371fea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python3
"""
Example usage script for LOL-EVE model.
This script demonstrates how to load and use the LOL-EVE model for genomic sequence analysis.
"""

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

def main():
    print("🧬 LOL-EVE Example Usage")
    print("=" * 40)
    
    # Load model and tokenizer
    print("Loading model and tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained('Marks-lab/LOL-EVE')
    model = AutoModelForCausalLM.from_pretrained('Marks-lab/LOL-EVE', trust_remote_code=True)
    print("✅ Model loaded successfully!")
    
    # Example 1: Basic DNA sequence
    print("\n1. Basic DNA Sequence Analysis")
    print("-" * 30)
    basic_sequence = "[MASK] [MASK] [MASK] [SOS]ATGCTAGCTAGCTAGCTAGCTA[EOS]"
    print(f"Input: {basic_sequence}")
    
    inputs = tokenizer(basic_sequence, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    
    print(f"Output shape: {outputs.logits.shape}")
    print(f"Sequence length: {outputs.logits.shape[1]} tokens")
    
    # Example 2: Control code sequence (recommended)
    print("\n2. Control Code Sequence Analysis")
    print("-" * 30)
    control_sequence = "brca1 human primate [SOS] ATGCTAGCTAGCTAGCTAGCTA [EOS]"
    print(f"Input: {control_sequence}")
    
    inputs = tokenizer(control_sequence, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    
    print(f"Output shape: {outputs.logits.shape}")
    print(f"Sequence length: {outputs.logits.shape[1]} tokens")
    
    # Example 3: Different gene
    print("\n3. Different Gene Analysis")
    print("-" * 30)
    tp53_sequence = "tp53 human primate [SOS] GATCGATCGATCGATCGATCGA [EOS]"
    print(f"Input: {tp53_sequence}")
    
    inputs = tokenizer(tp53_sequence, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    
    print(f"Output shape: {outputs.logits.shape}")
    print(f"Sequence length: {outputs.logits.shape[1]} tokens")
    
    print("\n" + "=" * 40)
    print("🎉 All examples completed successfully!")
    print("The model is ready for your genomic analysis tasks.")

if __name__ == "__main__":
    main()