|
|
from transformers import AutoModel, AutoTokenizer |
|
|
|
|
|
model_path = 'heqin-zhu/structRFM' |
|
|
|
|
|
|
|
|
model = AutoModel.from_pretrained(model_path) |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
|
|
|
|
|
|
|
seq = 'GUCCCAACUCUUGCGGGGAGGGAU' |
|
|
inputs = tokenizer(seq, return_tensors="pt") |
|
|
outputs = model(**inputs) |
|
|
print('>>> single seq, length:', len(seq)) |
|
|
for k, v in outputs.items(): |
|
|
print(k, v.shape) |
|
|
print(outputs.last_hidden_state.shape) |
|
|
|
|
|
|
|
|
seqs = ["GUCCCAA", 'AGUGUUG', 'AUGUAGUTCUN'] |
|
|
inputs = tokenizer( |
|
|
seqs, |
|
|
add_special_tokens=True, |
|
|
max_length=514, |
|
|
padding='max_length', |
|
|
truncation=True, |
|
|
return_tensors='pt' |
|
|
) |
|
|
outputs = model(**inputs) |
|
|
print('>>> batch seqs, batch:', len(seqs)) |
|
|
for k, v in outputs.items(): |
|
|
print(k, v.shape) |
|
|
|
|
|
''' |
|
|
>>> single seq, length: 24 |
|
|
last_hidden_state torch.Size([1, 24, 768]) |
|
|
pooler_output torch.Size([1, 768]) |
|
|
torch.Size([1, 24, 768]) |
|
|
>>> batch seqs, batch: 3 |
|
|
last_hidden_state torch.Size([3, 514, 768]) |
|
|
pooler_output torch.Size([3, 768]) |
|
|
''' |
|
|
|