Trouble getting function calling to work

by Jdo300 - opened Oct 28, 2023

Oct 28, 2023

I'm in the process of writing a wrapper/parser script that can connect to this model via an API call and then convert the prompt format between Mistral and the standard, OpenAI Completion object syntax. So far, I've been using the server that is created by llama.cpp to interact it and I want to use it with the MemGPT framework, which requires the function calling capability. Do you have a recommended framework to take advantage of the function calling feature (ideally one that can work with OpenAI API compatible completion objects)?

Jdo300

Oct 29, 2023

Here's some additional information. Here's the draft of the wrapper script that I created. I am able toe query the LLM and get a response, but it doesn't seem to be recognizing the function calls I'm sending it. Here's the code for the script:

import requests
import json
import re  # Regular expression library
from abc import ABC, abstractmethod
import logging

logging.basicConfig(filename='mistral_wrapper.log', level=logging.DEBUG,
                    format='%(asctime)s %(levelname)s %(name)s %(message)s')
logger = logging.getLogger(__name__)

# Add StreamHandler to print log messages to console
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)


# Base class from wrapper_base.py included below

class LLMChatCompletionWrapper(ABC):

    @abstractmethod
    def chat_completion_to_prompt(self, messages, functions):
        """Go from ChatCompletion to a single prompt string"""
        pass

    @abstractmethod
    def output_to_chat_completion_response(self, raw_llm_output):
        """Turn the LLM output string into a ChatCompletion response"""
        pass

#---------------------------------------------------------------

# Wrapper Class Implementation below
class OllamaMistral7BWrapper(LLMChatCompletionWrapper):
    def __init__(self, api_url):
        self.api_url = api_url

    def chat_completion_to_prompt(self, messages, functions):
        # Define the roles and markers
        B_INST, E_INST = "[INST]", "[/INST]"
        B_FUNC, E_FUNC = "<FUNCTIONS>", "</FUNCTIONS>\n\n"
        B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

        # Extract the system and user prompts from the messages
        system_prompt = ''
        user_prompt = ''
        for message in messages:
            if message['role'] == 'system':
                system_prompt = message['content']
            elif message['role'] == 'user':
                user_prompt = message['content']

        # Convert each function metadata dictionary to a single-line string
        function_metadata_strs = [json.dumps(func) for func in functions]
        # Join the single-line strings with two newline characters to separate them with a blank line
        function_list = '\n\n'.join(function_metadata_strs)

        # Format the prompt template based on whether there's a system prompt or not
        if system_prompt:
            prompt = f"{B_FUNC}{function_list}{E_FUNC}{B_INST} {B_SYS}{system_prompt.strip()}{E_SYS} {user_prompt.strip()} {E_INST}\n\n"
        else:
            prompt = f"{B_FUNC}{function_list}{E_FUNC}{B_INST} {user_prompt.strip()} {E_INST}\n\n"
        
        return prompt

    def output_to_chat_completion_response(self, raw_llm_output):
        content = raw_llm_output['content']
        function_call = None
        try:
            # Extract the result part of the response
            result_start_idx = content.find('result:')
            if result_start_idx != -1:
                result_text = content[result_start_idx + len('result:'):].strip()
                
        except Exception as e:
            logging.error(f"An error occurred while extracting the result: {e}")
            function_call = None

            # Look for a JSON object in 'content' that contains the function call request
            function_call_match = re.search(r'\{\s*"function":\s*".+?",\s*"arguments":\s*\{.+?\}\s*\}', content)
            if function_call_match:
                function_call_str = function_call_match.group(0)
                function_call = json.loads(function_call_str)
            else:
                function_call = None

        except json.JSONDecodeError:
            # Handle cases where there is no JSON object in the response
            pass
        
        response = {
            'message': {
                'role': 'assistant',
                'content': content,
                'function_call': function_call
            }
        }
        return response

    def call_llama(self, model, prompt):
        headers = {
            'Content-Type': 'application/json',
        }
        data = json.dumps({'model': model, 'prompt': prompt, 'stream': False})
        response = requests.post(f'{self.api_url}/completion', headers=headers, data=data)


        try:
            if response.status_code == 200:
                return response.json()
            else:
                logger.error(f'API call failed with status code: {response.status_code}, Response: {response.text}')

        except json.JSONDecodeError:
            logger.error(f'Failed to decode JSON: {response.text}')


    def chat_completion(self, messages, functions):
        model = 'mistral'  # Placeholder, replace with the actual model name for Mistral-7B
        # TODO: Replace with the actual model name for Mistral-7B
        formatted_request = self.chat_completion_to_prompt(messages, functions)
        logger.debug(f'Formatted request: {formatted_request}')  # for debugging and checking the output before it gets passed to the model
        raw_llm_output = self.call_llama(model, formatted_request)
        logger.debug('Raw output received')
        logger.debug(f'Raw output: {raw_llm_output}')
        chat_completion_response = self.output_to_chat_completion_response(raw_llm_output)
        return chat_completion_response

#---------------------------------------------------------------

# Example program to test Wrapper class

api_url = 'http://172.26.165.179:8080'  # Replace with the actual Ollama API base URL

ollama_wrapper = OllamaMistral7BWrapper(api_url)

messages = [
    {"role": "system", "content": 'You are a helpful assistant'},
    {"role": "user", "content": 'Please calculate the foo of 4 and 5 and return the result'}
]

functions = [
    {
        "function": "foo",
        "description": "Calculates the foo of two numbers",
        "arguments": [
            {
                "name": "number1",
                "type": "number",
                "description": "First number to calculate the foo of"
            },
            {
                "name": "number2",
                "type": "number",
                "description": "Second number to calculate the foo of"
            }
        ]
    }
]

response = ollama_wrapper.chat_completion(messages, functions)
logger.debug(f'Final chat completion response: {response}')

And here's the log output from the script:

2023-10-29 07:50:40,348 - __main__ - DEBUG - Formatted request: <FUNCTIONS>{"function": "foo", "description": "Calculates the foo of two numbers", "arguments": [{"name": "number1", "type": "number", "description": "First number to calculate the foo of"}, {"name": "number2", "type": "number", "description": "Second number to calculate the foo of"}]}</FUNCTIONS>

[INST] <<SYS>>
You are a helpful assistant
<</SYS>>

 Please calculate the foo of 4 and 5 and return the result [/INST]


2023-10-29 07:50:52,208 - __main__ - DEBUG - Raw output received
2023-10-29 07:50:52,208 - __main__ - DEBUG - Raw output: {'content': '{% set num1 = 4 %}\n{% set num2 = 5 %}\n{% set result = foo(num1, num2) %}\nThe foo of {{ num1 }} and {{ num2 }} is {{ result }}.', 'generation_settings': {'frequency_penalty': 0.0, 'grammar': '', 'ignore_eos': False, 'logit_bias': [], 'mirostat': 0, 'mirostat_eta': 0.10000000149011612, 'mirostat_tau': 5.0, 'model': '/home/jowens/models/7B/Mistral-7B-Instruct-v0.1-function-calling-v2/Mistral-7B-Instruct-v0.1-function-calling-v2.Q4_K.gguf', 'n_ctx': 8000, 'n_keep': 0, 'n_predict': -1, 'n_probs': 0, 'penalize_nl': True, 'presence_penalty': 0.0, 'repeat_last_n': 64, 'repeat_penalty': 1.100000023841858, 'seed': 4294967295, 'stop': [], 'stream': False, 'temp': 0.800000011920929, 'tfs_z': 1.0, 'top_k': 40, 'top_p': 0.949999988079071, 'typical_p': 1.0}, 'model': '/home/jowens/models/7B/Mistral-7B-Instruct-v0.1-function-calling-v2/Mistral-7B-Instruct-v0.1-function-calling-v2.Q4_K.gguf', 'prompt': '<FUNCTIONS>{"function": "foo", "description": "Calculates the foo of two numbers", "arguments": [{"name": "number1", "type": "number", "description": "First number to calculate the foo of"}, {"name": "number2", "type": "number", "description": "Second number to calculate the foo of"}]}</FUNCTIONS>\n\n[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\n Please calculate the foo of 4 and 5 and return the result [/INST]\n\n', 'slot_id': 0, 'stop': True, 'stopped_eos': True, 'stopped_limit': False, 'stopped_word': False, 'stopping_word': '', 'timings': {'predicted_ms': 6395.44, 'predicted_n': 55, 'predicted_per_second': 8.59987741265652, 'predicted_per_token_ms': 116.28072727272726, 'prompt_ms': 5447.577, 'prompt_n': 124, 'prompt_per_second': 22.76241345464231, 'prompt_per_token_ms': 43.93207258064516}, 'tokens_cached': 179, 'tokens_evaluated': 124, 'tokens_predicted': 55, 'truncated': False}
2023-10-29 07:50:52,210 - __main__ - DEBUG - Final chat completion response: {'message': {'role': 'assistant', 'content': '{% set num1 = 4 %}\n{% set num2 = 5 %}\n{% set result = foo(num1, num2) %}\nThe foo of {{ num1 }} and {{ num2 }} is {{ result }}.', 'function_call': None}}

I'm assuming that I'm probably formatting something wrong unless the issue is with how I'm providing the information to the API server (using llama.cpp). Any help greatly appreciated!

RonanMcGovern

Trelis org Oct 29, 2023

Howdy!

I haven't built a wrapper myself for a server, and I haven't done much with llama cpp on function calling other than this video, and I know that was working correctly at the time.

After installing llama cpp, have you tried just running the server and sending some requests from the command line (perhaps with the help of a short sh script because inputting functions via command line can be tricky)?

I'd probably start with that and confirm that what you send to the server is formatted as llama cpp expects and also gives you back what you expect.

For initial debugging, it's probably simpler to also leave out the system message as well.

Other small things:

you have 'model = 'mistral'' and I'm not sure that's calling the right model
worth doing some logging in call_llama to ensure that what actually goes to the server is what you think it is.

RonanMcGovern changed discussion status to closed Oct 29, 2023

RonanMcGovern changed discussion status to open Oct 29, 2023

Jdo300

Oct 31, 2023

•

edited Oct 31, 2023

@RonanMcGovern Thank you for providing the feedback! After a bit more playing around, I was finally able to get it to work! For anyone interested, here's a simple test script I created based on the function calling example to show how it works:

import requests
import json

# Define the roles and markers
B_INST, E_INST = "[INST]", "[/INST]"
B_FUNC, E_FUNC = "<FUNCTIONS>", "</FUNCTIONS>\n\n"

# Define the function metadata
function_metadata = {
    "function": "search_bing",
    "description": "Search the web for content on Bing. This allows users to search online/the internet/the web for content.",
    "arguments": [
        {
            "name": "query",
            "type": "string",
            "description": "The search query string"
        }
    ]
}

# Define the user prompt
user_prompt = 'Search for the latest news on AI.'

# Format the function list and prompt
function_list = json.dumps(function_metadata, indent=4)
prompt = f"{B_FUNC}{function_list.strip()}{E_FUNC}{B_INST} {user_prompt.strip()} {E_INST}\n\n"

# Define the API endpoint
url = "http:/localhost:8080/completion"

# Send the POST request to the API server
response = requests.post(url, json={"prompt": prompt})

# Print the response
print(response.json())

Jdo300 changed discussion status to closed Oct 31, 2023

RonanMcGovern

Trelis org Oct 31, 2023

Thanks! I'll add this to the repo for reference for people.

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment