from transformers import AutoModel, AutoTokenizer model_path = 'heqin-zhu/structRFM' # model_path = os.getenv('structRFM_checkpoint') model = AutoModel.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path) # single sequence seq = 'GUCCCAACUCUUGCGGGGAGGGAU' inputs = tokenizer(seq, return_tensors="pt") outputs = model(**inputs) print('>>> single seq, length:', len(seq)) for k, v in outputs.items(): print(k, v.shape) print(outputs.last_hidden_state.shape) # batch mode seqs = ["GUCCCAA", 'AGUGUUG', 'AUGUAGUTCUN'] inputs = tokenizer( seqs, add_special_tokens=True, max_length=514, padding='max_length', truncation=True, return_tensors='pt' ) outputs = model(**inputs) # note that the output sequential features are padded to max-length print('>>> batch seqs, batch:', len(seqs)) for k, v in outputs.items(): print(k, v.shape) ''' >>> single seq, length: 24 last_hidden_state torch.Size([1, 24, 768]) pooler_output torch.Size([1, 768]) torch.Size([1, 24, 768]) >>> batch seqs, batch: 3 last_hidden_state torch.Size([3, 514, 768]) pooler_output torch.Size([3, 768]) '''