Spaces:

m1b2lover
/

llamapy2

Paused

llamapy2 / llm.py

Update llm.py

1bcb27e verified 8 months ago

620 Bytes

	from llama_cpp import Llama

	some_kwargs = {
	"n_gpu_layers": -1,
	"verbose":True
	}

	llm = Llama.from_pretrained(
	repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
	filename="*q8_0.gguf",
	**some_kwargs
	)

	output = llm(
	"Q: Name the planets in the solar system? A: ", # Prompt
	max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
	stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
	echo=True # Echo the prompt back in the output
	) # Generate a completion, can also call create_completion

	print(output)