argilla
/

Llama-3.2-1B-Instruct-APIGen-FC-v0.1

@@ -21,19 +21,16 @@ It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
-Example query with prompt:
 ````python
 import json
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 from jinja2 import Template
-model_name = "plaguss/Llama-3.2-1B-Instruct-APIGen-FC-v0.1"
-model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto", trust_remote_code=True)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
 SYSTEM_PROMPT = """
 You are an expert in composing functions. You are given a question and a set of possible functions.
 Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
@@ -58,6 +55,63 @@ Please answer the following query:
 {{ query }}
 """.lstrip())
 get_weather_api = {
     "name": "get_weather",
     "description": "Get the current weather for a location",
@@ -93,45 +147,147 @@ search_api = {
     }
 }
-tools = [get_weather_api, search_api]
 query = "What's the weather like in New York in fahrenheit?"
-user_prompt = prompt.render(tools=json.dumps(tools), query=query)
-messages=[
-    {"role": "system", "content": SYSTEM_PROMPT},
-    { 'role': 'user', 'content': user_prompt}
-]
 inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
 outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
 result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=False)
-pattern = r'<tool_call>(.*?)</tool_call>'
-import re
-matches = re.findall(pattern, result, re.DOTALL)
-response = json.loads(matches[0])
 # [{'name': 'get_weather', 'arguments': {'location': 'New York', 'unit': 'fahrenheit'}}]
 ````
 Example response with no tools available
 ```python
-tools = "[]"
 query = "What's the weather like in New York in fahrenheit?"
-user_prompt = prompt.render(tools=json.dumps(tools), query=query)
-messages=[
-    {"role": "system", "content": SYSTEM_PROMPT},
-    { 'role': 'user', 'content': user_prompt}
-]
 inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
 outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
 result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
 # 'The query cannot be answered, no tools were provided.'
 ```
@@ -151,23 +307,21 @@ cut_number = {
   }
 }
-tools = [cut_number]
 query = "What's the weather like in New York in fahrenheit?"
-user_prompt = prompt.render(tools=json.dumps(tools), query=query)
-messages=[
-    {"role": "system", "content": SYSTEM_PROMPT},
-    { 'role': 'user', 'content': user_prompt}
-]
 inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
 outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
 result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
 # "The query cannot be answered with the provided tools. The query lacks the parameters required by the function. Please provide the parameters, and I'll be happy to assist."
 ```
 ## Training procedure
 [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/plaguss/huggingface/runs/dw9q43g4)

 ## Quick start
+See different examples of using the model:
+<details><summary> Click to see `prepare_messages` function </summary>
 ````python
+from typing import Optional
 import json
 from jinja2 import Template
 SYSTEM_PROMPT = """
 You are an expert in composing functions. You are given a question and a set of possible functions.
 Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
 {{ query }}
 """.lstrip())
+def prepare_messages(query: str, tools: Optional[dict[str, any]] = None) -> list[dict[str, str]]:
+    """Prepare the system and user messages for the given query and tools.
+    Args:
+        query: The query to be answered.
+        tools: The tools available to the user. Defaults to None, in which case if a
+            list without content will be passed to the model.
+    """
+    if tools is None:
+        tools = []
+    return [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": prompt.render(tools=json.dumps(tools), query=query)}
+    ]
+````
+</details>
+<details><summary> Click to see `parse_response` function </summary>
+```python
+import re
+import json
+def parse_response(text: str) -> str | dict[str, any]:
+    """Parses a response from the model, returning either the
+    parsed list with the tool calls parsed, or the
+    model thought or response if couldn't generate one.
+    Args:
+        text: Response from the model.
+    """
+    pattern = r"<tool_call>(.*?)</tool_call>"
+    matches = re.findall(pattern, text, re.DOTALL)
+    if matches:
+        return json.loads(matches[0])
+    return text
+```
+</details>
+Example of *simple* function call:
+````python
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_name = "plaguss/Llama-3.2-1B-Instruct-APIGen-FC-v0.1"
+model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 get_weather_api = {
     "name": "get_weather",
     "description": "Get the current weather for a location",
     }
 }
+available_tools = [get_weather_api, search_api]
 query = "What's the weather like in New York in fahrenheit?"
+messages = prepare_messages(query, tools=available_tools)
 inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
 outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
 result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=False)
+response = parse_response(result)
 # [{'name': 'get_weather', 'arguments': {'location': 'New York', 'unit': 'fahrenheit'}}]
 ````
+<details><summary> Click to see an example of *parallel* function call: </summary>
+```python
+available_tools = [{"name": "spotify.play", "description": "Play specific tracks from a given artist for a specific time duration.", "parameters": {"type": "dict", "properties": {"artist": {"type": "string", "description": "The artist whose songs you want to play."}, "duration": {"type": "integer", "description": "The duration for which the songs should be played, in minutes."}}, "required": ["artist", "duration"]}}]
+query = "Play songs from the artists Taylor Swift and Maroon 5, with a play time of 20 minutes and 15 minutes respectively, on Spotify."
+messages = prepare_messages(query, tools=available_tools)
+inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
+outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
+result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=False)
+response = parse_response(result)
+# [{'name': 'spotify.play', 'arguments': {'artist': 'Taylor Swift', 'duration': 20}}, {'name': 'spotify.play', 'arguments': {'artist': 'Maroon 5', 'duration': 15}}]
+```
+</details>
+<details><summary> Click to see an example of *multiple* function call: </summary>
+```python
+available_tools = [{"name": "country_info.largest_city", "description": "Fetch the largest city of a specified country.", "parameters": {"type": "dict", "properties": {"country": {"type": "string", "description": "Name of the country."}}, "required": ["country"]}}, {"name": "country_info.capital", "description": "Fetch the capital city of a specified country.", "parameters": {"type": "dict", "properties": {"country": {"type": "string", "description": "Name of the country."}}, "required": ["country"]}}, {"name": "country_info.population", "description": "Fetch the current population of a specified country.", "parameters": {"type": "dict", "properties": {"country": {"type": "string", "description": "Name of the country."}}, "required": ["country"]}}]
+query = "What is the capital of Brazil?"
+messages = prepare_messages(query, tools=available_tools)
+inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
+outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
+result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=False)
+response = parse_response(result)
+# [{'name': 'country_info.capital', 'arguments': {'country': 'Brazil'}}]
+```
+</details>
+<details><summary> Click to see an example of *parallel multiple* function call: </summary>
+```python
+available_tools = [{"name": "math_toolkit.sum_of_multiples", "description": "Find the sum of all multiples of specified numbers within a specified range.", "parameters": {"type": "dict", "properties": {"lower_limit": {"type": "integer", "description": "The start of the range (inclusive)."}, "upper_limit": {"type": "integer", "description": "The end of the range (inclusive)."}, "multiples": {"type": "array", "items": {"type": "integer"}, "description": "The numbers to find multiples of."}}, "required": ["lower_limit", "upper_limit", "multiples"]}}, {"name": "math_toolkit.product_of_primes", "description": "Find the product of the first n prime numbers.", "parameters": {"type": "dict", "properties": {"count": {"type": "integer", "description": "The number of prime numbers to multiply together."}}, "required": ["count"]}}]
+query = "Find the sum of all the multiples of 3 and 5 between 1 and 1000. Also find the product of the first five prime numbers."
+messages = prepare_messages(query, tools=available_tools)
+inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
+outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
+result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=False)
+response = parse_response(result)
+# [{'name': 'math_toolkit.sum_of_multiples', 'arguments': {'lower_limit': 1, 'upper_limit': 1000, 'multiples': [3, 5]}}, {'name': 'math_toolkit.product_of_primes', 'arguments': {'count': 5}}]
+```
+</details>
+<details><summary> Click to see an example of *multi-turn* function call: </summary>
+```python
+get_weather_api = {
+    "name": "get_weather",
+    "description": "Get the current weather for a location",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, New York"
+            },
+            "unit": {
+                "type": "string",
+                "enum": ["celsius", "fahrenheit"],
+                "description": "The unit of temperature to return"
+            }
+        },
+        "required": ["location"]
+    }
+}
+available_tools = [get_weather_api]
+query = "What's the weather like in Madrid in celsius?"
+messages = prepare_messages(query, tools=available_tools)
+inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
+outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
+result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=False)
+response = parse_response(result)
+# 2nd turn
+conversation_history = messages.copy()
+conversation_history.append({"role": "assistant", "content": json.dumps(response)})
+new_query = "And in Edinburgh in celsius?"
+new_messages = prepare_messages(new_query, tools=available_tools, conversation_history=conversation_history)
+inputs = tokenizer.apply_chat_template(new_messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
+outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
+result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=False)
+response = parse_response(result)
+# [{'name': 'get_weather', 'arguments': {'location': 'Edinburgh', 'unit': 'celsius'}}]
+```
+</details>
+<details><summary> Click to see an example of *irrelevance* function call: </summary>
 Example response with no tools available
 ```python
+available_tools = []
 query = "What's the weather like in New York in fahrenheit?"
+messages = prepare_messages(query, tools=available_tools)
 inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
 outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
 result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
+response = parse_response(result)
 # 'The query cannot be answered, no tools were provided.'
 ```
   }
 }
+available_tools = [cut_number]
 query = "What's the weather like in New York in fahrenheit?"
+messages = prepare_messages(query, tools=available_tools)
 inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
 outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
 result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
+response = parse_response(result)
 # "The query cannot be answered with the provided tools. The query lacks the parameters required by the function. Please provide the parameters, and I'll be happy to assist."
 ```
+</details>
 ## Training procedure
 [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/plaguss/huggingface/runs/dw9q43g4)