FunctionGemma-270m-ONNX-CPU

This is a quantized FP32 model based on X86 CPU FunctionGemma-270m. You can deploy it on your CPU devices.

Note: This is unoffical version,just for test and dev.

Installation


pip install onnxruntime-genai

Running


import onnxruntime_genai as og
import argparse
import os
import json
import time

model_folder = {Your FunctionGemma-270m-ONNX-CPU Path}

config = og.Config(model_folder)

model = og.Model(config)

tokenizer = og.Tokenizer(model)
tokenizer_stream = tokenizer.create_stream()

def get_current_weather(location: str, unit: str = "celsius"):
    """
    Get the current temperature at a location.

    Args:
        location: The location to get the temperature for.
        unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"])
    """
    return 22.0

import json

messages_list = [
    {"role": "developer", "content": "You are a model that can do function calling with the following functions<start_function_declaration>declaration:get_current_weather{description:<escape>Gets the current weather in a given location.<escape>,parameters:{properties:{location:{description:<escape>The city and state, e.g. \"San Francisco, CA\" or \"Tokyo, JP\"<escape>,type:<escape>STRING<escape>},unit:{description:<escape>The unit to return the temperature in.<escape>,enum:[<escape>celsius<escape>,<escape>fahrenheit<escape>],type:<escape>STRING<escape>}},required:[<escape>location<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration>"},
    {"role": "user", "content": "Hey, what's the weather in Tokyo right now?"},
]
messages = json.dumps(messages_list)

prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=True)
print(prompt)

params = og.GeneratorParams(model)
generator = og.Generator(model, params)

input_tokens = tokenizer.encode(prompt)
generator.append_tokens(input_tokens)

while not generator.is_done():
  generator.generate_next_token()
  new_token = generator.get_next_tokens()[0]
  print(tokenizer_stream.decode(new_token), end='', flush=True)

Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support