why it loop always
from llama_cpp import Llama
model_path = "/home/zhou/pro_xing/small_llm/checkpoints/functiongemma-270m-it-q8_0_ggml-org.gguf"
llm = Llama(
model_path=model_path,
n_ctx=512,
# verbose=False,
)
tools= {
"type": "function",
"function": {
"name": "get_current_temperature",
"description": "Gets the current temperature for a given location.",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name, e.g. San Francisco",
},
},
"required": ["location"],
},
}
}
Let the model choose which function to call
response = llm.create_chat_completion(
messages = [
# {
# "role": "developer",
# "content": "You are a model that can do function calling with the following functions"
# },
{
"role": "user",
"content": "Get the temperature in London?"
}
],
tools=[tools],
)
print(response)
{'id': 'chatcmpl-028f437e-eb19-4d26-91ba-0df019169718', 'object': 'chat.completion', 'created': 1766718193, 'model': '/home/zhou/pro_xing/small_llm/checkpoints/functiongemma-270m-it-q8_0_ggml-org.gguf', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London}call:get_current_temperature{location:London'}, 'logprobs': None, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 83, 'completion_tokens': 429, 'total_tokens': 512}}