Trouble getting function calling to work
I'm in the process of writing a wrapper/parser script that can connect to this model via an API call and then convert the prompt format between Mistral and the standard, OpenAI Completion object syntax. So far, I've been using the server that is created by llama.cpp to interact it and I want to use it with the MemGPT framework, which requires the function calling capability. Do you have a recommended framework to take advantage of the function calling feature (ideally one that can work with OpenAI API compatible completion objects)?
Here's some additional information. Here's the draft of the wrapper script that I created. I am able toe query the LLM and get a response, but it doesn't seem to be recognizing the function calls I'm sending it. Here's the code for the script:
import requests
import json
import re # Regular expression library
from abc import ABC, abstractmethod
import logging
logging.basicConfig(filename='mistral_wrapper.log', level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(name)s %(message)s')
logger = logging.getLogger(__name__)
# Add StreamHandler to print log messages to console
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
# Base class from wrapper_base.py included below
class LLMChatCompletionWrapper(ABC):
@abstractmethod
def chat_completion_to_prompt(self, messages, functions):
"""Go from ChatCompletion to a single prompt string"""
pass
@abstractmethod
def output_to_chat_completion_response(self, raw_llm_output):
"""Turn the LLM output string into a ChatCompletion response"""
pass
#---------------------------------------------------------------
# Wrapper Class Implementation below
class OllamaMistral7BWrapper(LLMChatCompletionWrapper):
def __init__(self, api_url):
self.api_url = api_url
def chat_completion_to_prompt(self, messages, functions):
# Define the roles and markers
B_INST, E_INST = "[INST]", "[/INST]"
B_FUNC, E_FUNC = "<FUNCTIONS>", "</FUNCTIONS>\n\n"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
# Extract the system and user prompts from the messages
system_prompt = ''
user_prompt = ''
for message in messages:
if message['role'] == 'system':
system_prompt = message['content']
elif message['role'] == 'user':
user_prompt = message['content']
# Convert each function metadata dictionary to a single-line string
function_metadata_strs = [json.dumps(func) for func in functions]
# Join the single-line strings with two newline characters to separate them with a blank line
function_list = '\n\n'.join(function_metadata_strs)
# Format the prompt template based on whether there's a system prompt or not
if system_prompt:
prompt = f"{B_FUNC}{function_list}{E_FUNC}{B_INST} {B_SYS}{system_prompt.strip()}{E_SYS} {user_prompt.strip()} {E_INST}\n\n"
else:
prompt = f"{B_FUNC}{function_list}{E_FUNC}{B_INST} {user_prompt.strip()} {E_INST}\n\n"
return prompt
def output_to_chat_completion_response(self, raw_llm_output):
content = raw_llm_output['content']
function_call = None
try:
# Extract the result part of the response
result_start_idx = content.find('result:')
if result_start_idx != -1:
result_text = content[result_start_idx + len('result:'):].strip()
except Exception as e:
logging.error(f"An error occurred while extracting the result: {e}")
function_call = None
# Look for a JSON object in 'content' that contains the function call request
function_call_match = re.search(r'\{\s*"function":\s*".+?",\s*"arguments":\s*\{.+?\}\s*\}', content)
if function_call_match:
function_call_str = function_call_match.group(0)
function_call = json.loads(function_call_str)
else:
function_call = None
except json.JSONDecodeError:
# Handle cases where there is no JSON object in the response
pass
response = {
'message': {
'role': 'assistant',
'content': content,
'function_call': function_call
}
}
return response
def call_llama(self, model, prompt):
headers = {
'Content-Type': 'application/json',
}
data = json.dumps({'model': model, 'prompt': prompt, 'stream': False})
response = requests.post(f'{self.api_url}/completion', headers=headers, data=data)
try:
if response.status_code == 200:
return response.json()
else:
logger.error(f'API call failed with status code: {response.status_code}, Response: {response.text}')
except json.JSONDecodeError:
logger.error(f'Failed to decode JSON: {response.text}')
def chat_completion(self, messages, functions):
model = 'mistral' # Placeholder, replace with the actual model name for Mistral-7B
# TODO: Replace with the actual model name for Mistral-7B
formatted_request = self.chat_completion_to_prompt(messages, functions)
logger.debug(f'Formatted request: {formatted_request}') # for debugging and checking the output before it gets passed to the model
raw_llm_output = self.call_llama(model, formatted_request)
logger.debug('Raw output received')
logger.debug(f'Raw output: {raw_llm_output}')
chat_completion_response = self.output_to_chat_completion_response(raw_llm_output)
return chat_completion_response
#---------------------------------------------------------------
# Example program to test Wrapper class
api_url = 'http://172.26.165.179:8080' # Replace with the actual Ollama API base URL
ollama_wrapper = OllamaMistral7BWrapper(api_url)
messages = [
{"role": "system", "content": 'You are a helpful assistant'},
{"role": "user", "content": 'Please calculate the foo of 4 and 5 and return the result'}
]
functions = [
{
"function": "foo",
"description": "Calculates the foo of two numbers",
"arguments": [
{
"name": "number1",
"type": "number",
"description": "First number to calculate the foo of"
},
{
"name": "number2",
"type": "number",
"description": "Second number to calculate the foo of"
}
]
}
]
response = ollama_wrapper.chat_completion(messages, functions)
logger.debug(f'Final chat completion response: {response}')
And here's the log output from the script:
2023-10-29 07:50:40,348 - __main__ - DEBUG - Formatted request: <FUNCTIONS>{"function": "foo", "description": "Calculates the foo of two numbers", "arguments": [{"name": "number1", "type": "number", "description": "First number to calculate the foo of"}, {"name": "number2", "type": "number", "description": "Second number to calculate the foo of"}]}</FUNCTIONS>
[INST] <<SYS>>
You are a helpful assistant
<</SYS>>
Please calculate the foo of 4 and 5 and return the result [/INST]
2023-10-29 07:50:52,208 - __main__ - DEBUG - Raw output received
2023-10-29 07:50:52,208 - __main__ - DEBUG - Raw output: {'content': '{% set num1 = 4 %}\n{% set num2 = 5 %}\n{% set result = foo(num1, num2) %}\nThe foo of {{ num1 }} and {{ num2 }} is {{ result }}.', 'generation_settings': {'frequency_penalty': 0.0, 'grammar': '', 'ignore_eos': False, 'logit_bias': [], 'mirostat': 0, 'mirostat_eta': 0.10000000149011612, 'mirostat_tau': 5.0, 'model': '/home/jowens/models/7B/Mistral-7B-Instruct-v0.1-function-calling-v2/Mistral-7B-Instruct-v0.1-function-calling-v2.Q4_K.gguf', 'n_ctx': 8000, 'n_keep': 0, 'n_predict': -1, 'n_probs': 0, 'penalize_nl': True, 'presence_penalty': 0.0, 'repeat_last_n': 64, 'repeat_penalty': 1.100000023841858, 'seed': 4294967295, 'stop': [], 'stream': False, 'temp': 0.800000011920929, 'tfs_z': 1.0, 'top_k': 40, 'top_p': 0.949999988079071, 'typical_p': 1.0}, 'model': '/home/jowens/models/7B/Mistral-7B-Instruct-v0.1-function-calling-v2/Mistral-7B-Instruct-v0.1-function-calling-v2.Q4_K.gguf', 'prompt': '<FUNCTIONS>{"function": "foo", "description": "Calculates the foo of two numbers", "arguments": [{"name": "number1", "type": "number", "description": "First number to calculate the foo of"}, {"name": "number2", "type": "number", "description": "Second number to calculate the foo of"}]}</FUNCTIONS>\n\n[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\n Please calculate the foo of 4 and 5 and return the result [/INST]\n\n', 'slot_id': 0, 'stop': True, 'stopped_eos': True, 'stopped_limit': False, 'stopped_word': False, 'stopping_word': '', 'timings': {'predicted_ms': 6395.44, 'predicted_n': 55, 'predicted_per_second': 8.59987741265652, 'predicted_per_token_ms': 116.28072727272726, 'prompt_ms': 5447.577, 'prompt_n': 124, 'prompt_per_second': 22.76241345464231, 'prompt_per_token_ms': 43.93207258064516}, 'tokens_cached': 179, 'tokens_evaluated': 124, 'tokens_predicted': 55, 'truncated': False}
2023-10-29 07:50:52,210 - __main__ - DEBUG - Final chat completion response: {'message': {'role': 'assistant', 'content': '{% set num1 = 4 %}\n{% set num2 = 5 %}\n{% set result = foo(num1, num2) %}\nThe foo of {{ num1 }} and {{ num2 }} is {{ result }}.', 'function_call': None}}
I'm assuming that I'm probably formatting something wrong unless the issue is with how I'm providing the information to the API server (using llama.cpp). Any help greatly appreciated!
Howdy!
I haven't built a wrapper myself for a server, and I haven't done much with llama cpp on function calling other than this video, and I know that was working correctly at the time.
After installing llama cpp, have you tried just running the server and sending some requests from the command line (perhaps with the help of a short sh script because inputting functions via command line can be tricky)?
I'd probably start with that and confirm that what you send to the server is formatted as llama cpp expects and also gives you back what you expect.
For initial debugging, it's probably simpler to also leave out the system message as well.
Other small things:
- you have 'model = 'mistral'' and I'm not sure that's calling the right model
- worth doing some logging in call_llama to ensure that what actually goes to the server is what you think it is.
@RonanMcGovern Thank you for providing the feedback! After a bit more playing around, I was finally able to get it to work! For anyone interested, here's a simple test script I created based on the function calling example to show how it works:
import requests
import json
# Define the roles and markers
B_INST, E_INST = "[INST]", "[/INST]"
B_FUNC, E_FUNC = "<FUNCTIONS>", "</FUNCTIONS>\n\n"
# Define the function metadata
function_metadata = {
"function": "search_bing",
"description": "Search the web for content on Bing. This allows users to search online/the internet/the web for content.",
"arguments": [
{
"name": "query",
"type": "string",
"description": "The search query string"
}
]
}
# Define the user prompt
user_prompt = 'Search for the latest news on AI.'
# Format the function list and prompt
function_list = json.dumps(function_metadata, indent=4)
prompt = f"{B_FUNC}{function_list.strip()}{E_FUNC}{B_INST} {user_prompt.strip()} {E_INST}\n\n"
# Define the API endpoint
url = "http:/localhost:8080/completion"
# Send the POST request to the API server
response = requests.post(url, json={"prompt": prompt})
# Print the response
print(response.json())
Thanks! I'll add this to the repo for reference for people.