from transformers import AutoModelForCausalLM, AutoTokenizer import torch path = "spark_v4_fp16_final" tokenizer = AutoTokenizer.from_pretrained(path) model = AutoModelForCausalLM.from_pretrained(path).to("cuda") prompts = [ "Artificial Intelligence is", "The main concept of physics is", "In the year 1969, " ] for prompt in prompts: inputs = tokenizer(prompt, return_tensors="pt").to("cuda") outputs = model.generate( **inputs, max_new_tokens=200, do_sample=True, top_k=25, temperature=0.8, pad_token_id=tokenizer.eos_token_id ) print(f"PROMPT: {prompt}") print(f"OUTPUT: {tokenizer.decode(outputs[0], skip_special_tokens=True)}\n{'-'*40}")