from transformers import AutoModelForCausalLM, AutoTokenizer import torch model_name = "AxiomicLabs/GPT-S-5M" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto", ) prompt = "The future of AI is" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.inference_mode(): output = model.generate( **inputs, max_new_tokens=120, do_sample=True, temperature=0.8, top_p=0.95, repetition_penalty=1.1, no_repeat_ngram_size=4, ) text = tokenizer.decode(output[0], skip_special_tokens=True) print(text)