FunctionGemma-270m-ONNX-CPU
This is a quantized FP32 model based on X86 CPU FunctionGemma-270m. You can deploy it on your CPU devices.
Note: This is unoffical version,just for test and dev.
Installation
pip install onnxruntime-genai
Running
import onnxruntime_genai as og
import argparse
import os
import json
import time
model_folder = {Your FunctionGemma-270m-ONNX-CPU Path}
config = og.Config(model_folder)
model = og.Model(config)
tokenizer = og.Tokenizer(model)
tokenizer_stream = tokenizer.create_stream()
def get_current_weather(location: str, unit: str = "celsius"):
"""
Get the current temperature at a location.
Args:
location: The location to get the temperature for.
unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"])
"""
return 22.0
import json
messages_list = [
{"role": "developer", "content": "You are a model that can do function calling with the following functions<start_function_declaration>declaration:get_current_weather{description:<escape>Gets the current weather in a given location.<escape>,parameters:{properties:{location:{description:<escape>The city and state, e.g. \"San Francisco, CA\" or \"Tokyo, JP\"<escape>,type:<escape>STRING<escape>},unit:{description:<escape>The unit to return the temperature in.<escape>,enum:[<escape>celsius<escape>,<escape>fahrenheit<escape>],type:<escape>STRING<escape>}},required:[<escape>location<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration>"},
{"role": "user", "content": "Hey, what's the weather in Tokyo right now?"},
]
messages = json.dumps(messages_list)
prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=True)
print(prompt)
params = og.GeneratorParams(model)
generator = og.Generator(model, params)
input_tokens = tokenizer.encode(prompt)
generator.append_tokens(input_tokens)
while not generator.is_done():
generator.generate_next_token()
new_token = generator.get_next_tokens()[0]
print(tokenizer_stream.decode(new_token), end='', flush=True)
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support