LiteRT-LM / c /engine.h
SeaWolf-AI's picture
Upload full LiteRT-LM codebase
5f923cd verified
// Copyright 2025 The ODML Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_ODML_LITERT_LM_C_ENGINE_H_
#define THIRD_PARTY_ODML_LITERT_LM_C_ENGINE_H_
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// For Windows, __declspec( dllexport ) is required to export function in .dll.
// https://learn.microsoft.com/en-us/cpp/cpp/using-dllimport-and-dllexport-in-cpp-classes?view=msvc-170
//
// _WIN32 is defined as 1 when the compilation target is 32-bit ARM, 64-bit ARM,
// x86, x64, or ARM64EC. Otherwise, undefined.
// https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros
#if defined(_WIN32)
#define LITERT_LM_C_API_EXPORT __declspec(dllexport)
#else
#define LITERT_LM_C_API_EXPORT
#endif
// Opaque pointer for the LiteRT LM Engine.
typedef struct LiteRtLmEngine LiteRtLmEngine;
// Opaque pointer for the LiteRT LM Session.
typedef struct LiteRtLmSession LiteRtLmSession;
// Opaque pointer for the LiteRT LM Responses.
typedef struct LiteRtLmResponses LiteRtLmResponses;
// Opaque pointer for the LiteRT LM Engine Settings.
typedef struct LiteRtLmEngineSettings LiteRtLmEngineSettings;
// Opaque pointer for the LiteRT LM Benchmark Info.
typedef struct LiteRtLmBenchmarkInfo LiteRtLmBenchmarkInfo;
// Opaque pointer for the LiteRT LM Conversation.
typedef struct LiteRtLmConversation LiteRtLmConversation;
// Opaque pointer for a JSON response.
typedef struct LiteRtLmJsonResponse LiteRtLmJsonResponse;
// Opaque pointer for LiteRT LM Session Config.
typedef struct LiteRtLmSessionConfig LiteRtLmSessionConfig;
// Opaque pointer for LiteRT LM Conversation Config.
typedef struct LiteRtLmConversationConfig LiteRtLmConversationConfig;
// Represents the type of sampler.
typedef enum {
kTypeUnspecified = 0,
// Probabilistically pick among the top k tokens.
kTopK = 1,
// Probabilistically pick among the tokens such that the sum is greater
// than or equal to p tokens after first performing top-k sampling.
kTopP = 2,
// Pick the token with maximum logit (i.e., argmax).
kGreedy = 3,
} Type;
// Parameters for the sampler.
typedef struct {
Type type;
int32_t top_k;
float top_p;
float temperature;
int32_t seed;
} LiteRtLmSamplerParams;
// Creates a LiteRT LM Session Config.
// The caller is responsible for destroying the config using
// `litert_lm_session_config_delete`.
// @return A pointer to the created config, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmSessionConfig* litert_lm_session_config_create();
// Sets the maximum number of output tokens per decode step for this session.
// @param config The config to modify.
// @param max_output_tokens The maximum number of output tokens.
LITERT_LM_C_API_EXPORT
void litert_lm_session_config_set_max_output_tokens(
LiteRtLmSessionConfig* config, int max_output_tokens);
// Sets the sampler parameters for this session config.
// @param config The config to modify.
// @param sampler_params The sampler parameters to use.
LITERT_LM_C_API_EXPORT
void litert_lm_session_config_set_sampler_params(
LiteRtLmSessionConfig* config, const LiteRtLmSamplerParams* sampler_params);
// Destroys a LiteRT LM Session Config.
// @param config The config to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_session_config_delete(LiteRtLmSessionConfig* config);
// Creates a LiteRT LM Conversation Config.
// The caller is responsible for destroying the config using
// `litert_lm_conversation_config_delete`.
// @param engine The engine to use.
// @param session_config The session config to use. If NULL, default
// session config will be used.
// @param system_message_json The system message in JSON format.
// @param tools_json The tools description in JSON array format.
// @param enable_constrained_decoding Whether to enable constrained decoding.
// @return A pointer to the created config, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmConversationConfig* litert_lm_conversation_config_create(
LiteRtLmEngine* engine, const LiteRtLmSessionConfig* session_config,
const char* system_message_json, const char* tools_json,
const char* messages_json, bool enable_constrained_decoding);
// Destroys a LiteRT LM Conversation Config.
// @param config The config to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_conversation_config_delete(LiteRtLmConversationConfig* config);
// Sets the minimum log level for the LiteRT LM library.
// Log levels are: 0=INFO, 1=WARNING, 2=ERROR, 3=FATAL.
LITERT_LM_C_API_EXPORT
void litert_lm_set_min_log_level(int level);
// Represents the type of input data.
typedef enum {
kInputText,
kInputImage,
kInputImageEnd,
kInputAudio,
kInputAudioEnd,
} InputDataType;
// Represents a single piece of input data.
typedef struct {
InputDataType type;
// The data pointer. The interpretation depends on the `type`.
// For kInputText, it's a UTF-8 string.
// For kInputImage and kInputAudio, it's a pointer to the raw bytes.
const void* data;
// The size of the data in bytes.
size_t size;
} InputData;
// Creates LiteRT LM Engine Settings. The caller is responsible for destroying
// the settings using `litert_lm_engine_settings_delete`.
//
// @param model_path The path to the model file.
// @param backend_str The backend to use (e.g., "cpu", "gpu").
// @param vision_backend_str The vision backend to use, or NULL if not set.
// @param audio_backend_str The audio backend to use, or NULL if not set.
// @return A pointer to the created settings, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmEngineSettings* litert_lm_engine_settings_create(
const char* model_path, const char* backend_str,
const char* vision_backend_str, const char* audio_backend_str);
// Destroys LiteRT LM Engine Settings.
//
// @param settings The settings to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_delete(LiteRtLmEngineSettings* settings);
// Sets the maximum number of tokens for the engine.
//
// @param settings The engine settings.
// @param max_num_tokens The maximum number of tokens.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_set_max_num_tokens(
LiteRtLmEngineSettings* settings, int max_num_tokens);
// Sets the cache directory for the engine.
//
// @param settings The engine settings.
// @param cache_dir The cache directory.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_set_cache_dir(LiteRtLmEngineSettings* settings,
const char* cache_dir);
// Sets the activation data type.
//
// @param settings The engine settings.
// @param activation_data_type_int The activation data type. See
// `ActivationDataType` in executor_settings_base.h for the possible values
// (e.g., 0 for F32, 1 for F16, 2 for I16, 3 for I8).
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_set_activation_data_type(
LiteRtLmEngineSettings* settings, int activation_data_type_int);
// Sets the prefill chunk size for the engine. Only applicable for CPU backend
// with dynamic models.
//
// @param settings The engine settings.
// @param prefill_chunk_size The prefill chunk size.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_set_prefill_chunk_size(
LiteRtLmEngineSettings* settings, int prefill_chunk_size);
// Enables benchmarking for the engine.
//
// @param settings The engine settings.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_enable_benchmark(
LiteRtLmEngineSettings* settings);
// Sets the number of prefill tokens for benchmarking.
//
// @param settings The engine settings.
// @param num_prefill_tokens The number of prefill tokens.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_set_num_prefill_tokens(
LiteRtLmEngineSettings* settings, int num_prefill_tokens);
// Sets the number of decode tokens for benchmarking.
//
// @param settings The engine settings.
// @param num_decode_tokens The number of decode tokens.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_set_num_decode_tokens(
LiteRtLmEngineSettings* settings, int num_decode_tokens);
// Creates a LiteRT LM Engine from the given settings. The caller is responsible
// for destroying the engine using `litert_lm_engine_delete`.
//
// @param settings The engine settings.
// @return A pointer to the created engine, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmEngine* litert_lm_engine_create(const LiteRtLmEngineSettings* settings);
// Destroys a LiteRT LM Engine.
//
// @param engine The engine to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_delete(LiteRtLmEngine* engine);
// Creates a LiteRT LM Session. The caller is responsible for destroying the
// session using `litert_lm_session_delete`.
//
// @param engine The engine to create the session from.
// @param config The session config of the session. If NULL, use the default
// session config.
// @return A pointer to the created session, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmSession* litert_lm_engine_create_session(LiteRtLmEngine* engine,
LiteRtLmSessionConfig* config);
// Destroys a LiteRT LM Session.
//
// @param session The session to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_session_delete(LiteRtLmSession* session);
// Generates content from the input prompt.
//
// @param session The session to use for generation.
// @param inputs An array of InputData structs representing the multimodal
// input.
// @param num_inputs The number of InputData structs in the array.
// @return A pointer to the responses, or NULL on failure. The caller is
// responsible for deleting the responses using `litert_lm_responses_delete`.
LITERT_LM_C_API_EXPORT
LiteRtLmResponses* litert_lm_session_generate_content(LiteRtLmSession* session,
const InputData* inputs,
size_t num_inputs);
// Destroys a LiteRT LM Responses object.
//
// @param responses The responses to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_responses_delete(LiteRtLmResponses* responses);
// Returns the number of response candidates.
//
// @param responses The responses object.
// @return The number of candidates.
LITERT_LM_C_API_EXPORT
int litert_lm_responses_get_num_candidates(const LiteRtLmResponses* responses);
// Returns the response text at a given index.
//
// @param responses The responses object.
// @param index The index of the response.
// @return The response text. The returned string is owned by the `responses`
// object and is valid only for its lifetime. Returns NULL if index is out of
// bounds.
LITERT_LM_C_API_EXPORT
const char* litert_lm_responses_get_response_text_at(
const LiteRtLmResponses* responses, int index);
// Retrieves the benchmark information from the session. The caller is
// responsible for destroying the benchmark info using
// `litert_lm_benchmark_info_delete`.
//
// @param session The session to get the benchmark info from.
// @return A pointer to the benchmark info, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmBenchmarkInfo* litert_lm_session_get_benchmark_info(
LiteRtLmSession* session);
// Destroys a LiteRT LM Benchmark Info object.
//
// @param benchmark_info The benchmark info to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_benchmark_info_delete(LiteRtLmBenchmarkInfo* benchmark_info);
// Returns the time to the first token in seconds.
//
// Note that the first time to token doesn't include the time for
// initialization. It is the sum of the prefill time for the first turn and
// the time spent for decoding the first token.
//
// @param benchmark_info The benchmark info object.
// @return The time to the first token in seconds.
LITERT_LM_C_API_EXPORT
double litert_lm_benchmark_info_get_time_to_first_token(
const LiteRtLmBenchmarkInfo* benchmark_info);
// Returns the total initialization time in seconds.
//
// @param benchmark_info The benchmark info object.
// @return The total initialization time in seconds.
LITERT_LM_C_API_EXPORT
double litert_lm_benchmark_info_get_total_init_time_in_second(
const LiteRtLmBenchmarkInfo* benchmark_info);
// Returns the number of prefill turns.
//
// @param benchmark_info The benchmark info object.
// @return The number of prefill turns.
LITERT_LM_C_API_EXPORT
int litert_lm_benchmark_info_get_num_prefill_turns(
const LiteRtLmBenchmarkInfo* benchmark_info);
// Returns the number of decode turns.
//
// @param benchmark_info The benchmark info object.
// @return The number of decode turns.
LITERT_LM_C_API_EXPORT
int litert_lm_benchmark_info_get_num_decode_turns(
const LiteRtLmBenchmarkInfo* benchmark_info);
// Returns the prefill token count at a given turn index.
//
// @param benchmark_info The benchmark info object.
// @param index The index of the prefill turn.
// @return The prefill token count.
LITERT_LM_C_API_EXPORT
int litert_lm_benchmark_info_get_prefill_token_count_at(
const LiteRtLmBenchmarkInfo* benchmark_info, int index);
// Returns the decode token count at a given turn index.
//
// @param benchmark_info The benchmark info object.
// @param index The index of the decode turn.
// @return The decode token count.
LITERT_LM_C_API_EXPORT
int litert_lm_benchmark_info_get_decode_token_count_at(
const LiteRtLmBenchmarkInfo* benchmark_info, int index);
// Returns the prefill tokens per second at a given turn index.
//
// @param benchmark_info The benchmark info object.
// @param index The index of the prefill turn.
// @return The prefill tokens per second.
LITERT_LM_C_API_EXPORT
double litert_lm_benchmark_info_get_prefill_tokens_per_sec_at(
const LiteRtLmBenchmarkInfo* benchmark_info, int index);
// Returns the decode tokens per second at a given turn index.
//
// @param benchmark_info The benchmark info object.
// @param index The index of the decode turn.
// @return The decode tokens per second.
LITERT_LM_C_API_EXPORT
double litert_lm_benchmark_info_get_decode_tokens_per_sec_at(
const LiteRtLmBenchmarkInfo* benchmark_info, int index);
// Callback for streaming responses.
// `callback_data` is a pointer to user-defined data passed to the stream
// function. `chunk` is the piece of text from the stream. It's only valid for
// the duration of the call. `is_final` is true if this is the last chunk in the
// stream. `error_msg` is a null-terminated string with an error message, or
// NULL on success.
typedef void (*LiteRtLmStreamCallback)(void* callback_data, const char* chunk,
bool is_final, const char* error_msg);
// Generates content from the input prompt and streams the response via a
// callback. This is a non-blocking call that will invoke the callback from a
// background thread for each chunk.
//
// @param session The session to use for generation.
// @param inputs An array of InputData structs representing the multimodal
// input.
// @param num_inputs The number of InputData structs in the array.
// @param callback The callback function to receive response chunks.
// @param callback_data A pointer to user data that will be passed to the
// callback.
// @return 0 on success, non-zero on failure to start the stream.
LITERT_LM_C_API_EXPORT
int litert_lm_session_generate_content_stream(LiteRtLmSession* session,
const InputData* inputs,
size_t num_inputs,
LiteRtLmStreamCallback callback,
void* callback_data);
// Creates a LiteRT LM Conversation. The caller is responsible for destroying
// the conversation using `litert_lm_conversation_delete`.
//
// @param engine The engine to create the conversation from.
// @param config The conversation config to use. If NULL, the default config
// will be used.
// @return A pointer to the created conversation, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmConversation* litert_lm_conversation_create(
LiteRtLmEngine* engine, LiteRtLmConversationConfig* config);
// Destroys a LiteRT LM Conversation.
//
// @param conversation The conversation to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_conversation_delete(LiteRtLmConversation* conversation);
// Sends a message to the conversation and returns the response.
// This is a blocking call.
//
// @param conversation The conversation to use.
// @param message_json A JSON string representing the message to send.
// @param extra_context A JSON string representing the extra context to use.
// @return A pointer to the JSON response, or NULL on failure. The caller is
// responsible for deleting the response using
// `litert_lm_json_response_delete`.
LITERT_LM_C_API_EXPORT
LiteRtLmJsonResponse* litert_lm_conversation_send_message(
LiteRtLmConversation* conversation, const char* message_json,
const char* extra_context);
// Destroys a LiteRT LM Json Response object.
//
// @param response The response to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_json_response_delete(LiteRtLmJsonResponse* response);
// Returns the JSON response string from a response object.
//
// @param response The response object.
// @return The response JSON string. The returned string is owned by the
// `response` object and is valid only for its lifetime. Returns NULL if
// response is NULL.
LITERT_LM_C_API_EXPORT
const char* litert_lm_json_response_get_string(
const LiteRtLmJsonResponse* response);
// Sends a message to the conversation and streams the response via a
// callback. This is a non-blocking call that will invoke the callback from a
// background thread for each chunk.
//
// @param conversation The conversation to use.
// @param message_json A JSON string representing the message to send.
// @param extra_context A JSON string representing the extra context to use.
// @param callback The callback function to receive response chunks.
// @param callback_data A pointer to user data that will be passed to the
// callback.
// @return 0 on success, non-zero on failure to start the stream.
LITERT_LM_C_API_EXPORT
int litert_lm_conversation_send_message_stream(
LiteRtLmConversation* conversation, const char* message_json,
const char* extra_context, LiteRtLmStreamCallback callback,
void* callback_data);
// Cancels the ongoing inference process, for asynchronous inference.
//
// @param conversation The conversation to cancel the inference for.
LITERT_LM_C_API_EXPORT
void litert_lm_conversation_cancel_process(LiteRtLmConversation* conversation);
// Retrieves the benchmark information from the conversation. The caller is
// responsible for destroying the benchmark info using
// `litert_lm_benchmark_info_delete`.
//
// @param conversation The conversation to get the benchmark info from.
// @return A pointer to the benchmark info, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmBenchmarkInfo* litert_lm_conversation_get_benchmark_info(
LiteRtLmConversation* conversation);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // THIRD_PARTY_ODML_LITERT_LM_C_ENGINE_H_