Spaces:

m1b2lover
/

llamapy2

Paused

File size: 620 Bytes

eb82c6a
 
 
 
1bcb27e
eb82c6a

from llama_cpp import Llama

some_kwargs = {
    "n_gpu_layers": -1,
    "verbose":True
}

llm = Llama.from_pretrained(
    repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
    filename="*q8_0.gguf",
    **some_kwargs
)

output = llm(
      "Q: Name the planets in the solar system? A: ", # Prompt
      max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
      stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
      echo=True # Echo the prompt back in the output
) # Generate a completion, can also call create_completion

print(output)