| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| tok = AutoTokenizer.from_pretrained("/projects/llama-cpt/models/loopllama", trust_remote_code=True) | |
| m = AutoModelForCausalLM.from_pretrained("/projects/llama-cpt/models/loopllama", trust_remote_code=True) | |
| out = m(**tok("hello", return_tensors="pt")) | |
| print(out.logits.shape) # [1, seq_len, vocab_size] | |