File size: 620 Bytes
eb82c6a 1bcb27e eb82c6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
from llama_cpp import Llama
some_kwargs = {
"n_gpu_layers": -1,
"verbose":True
}
llm = Llama.from_pretrained(
repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
filename="*q8_0.gguf",
**some_kwargs
)
output = llm(
"Q: Name the planets in the solar system? A: ", # Prompt
max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
echo=True # Echo the prompt back in the output
) # Generate a completion, can also call create_completion
print(output) |