Spaces:
Running
Running
| // Copyright 2025 The ODML Authors. | |
| // | |
| // Licensed under the Apache License, Version 2.0 (the "License"); | |
| // you may not use this file except in compliance with the License. | |
| // You may obtain a copy of the License at | |
| // | |
| // http://www.apache.org/licenses/LICENSE-2.0 | |
| // | |
| // Unless required by applicable law or agreed to in writing, software | |
| // distributed under the License is distributed on an "AS IS" BASIS, | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| // See the License for the specific language governing permissions and | |
| // limitations under the License. | |
| extern "C" { | |
| // For Windows, __declspec( dllexport ) is required to export function in .dll. | |
| // https://learn.microsoft.com/en-us/cpp/cpp/using-dllimport-and-dllexport-in-cpp-classes?view=msvc-170 | |
| // | |
| // _WIN32 is defined as 1 when the compilation target is 32-bit ARM, 64-bit ARM, | |
| // x86, x64, or ARM64EC. Otherwise, undefined. | |
| // https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros | |
| // Opaque pointer for the LiteRT LM Engine. | |
| typedef struct LiteRtLmEngine LiteRtLmEngine; | |
| // Opaque pointer for the LiteRT LM Session. | |
| typedef struct LiteRtLmSession LiteRtLmSession; | |
| // Opaque pointer for the LiteRT LM Responses. | |
| typedef struct LiteRtLmResponses LiteRtLmResponses; | |
| // Opaque pointer for the LiteRT LM Engine Settings. | |
| typedef struct LiteRtLmEngineSettings LiteRtLmEngineSettings; | |
| // Opaque pointer for the LiteRT LM Benchmark Info. | |
| typedef struct LiteRtLmBenchmarkInfo LiteRtLmBenchmarkInfo; | |
| // Opaque pointer for the LiteRT LM Conversation. | |
| typedef struct LiteRtLmConversation LiteRtLmConversation; | |
| // Opaque pointer for a JSON response. | |
| typedef struct LiteRtLmJsonResponse LiteRtLmJsonResponse; | |
| // Opaque pointer for LiteRT LM Session Config. | |
| typedef struct LiteRtLmSessionConfig LiteRtLmSessionConfig; | |
| // Opaque pointer for LiteRT LM Conversation Config. | |
| typedef struct LiteRtLmConversationConfig LiteRtLmConversationConfig; | |
| // Represents the type of sampler. | |
| typedef enum { | |
| kTypeUnspecified = 0, | |
| // Probabilistically pick among the top k tokens. | |
| kTopK = 1, | |
| // Probabilistically pick among the tokens such that the sum is greater | |
| // than or equal to p tokens after first performing top-k sampling. | |
| kTopP = 2, | |
| // Pick the token with maximum logit (i.e., argmax). | |
| kGreedy = 3, | |
| } Type; | |
| // Parameters for the sampler. | |
| typedef struct { | |
| Type type; | |
| int32_t top_k; | |
| float top_p; | |
| float temperature; | |
| int32_t seed; | |
| } LiteRtLmSamplerParams; | |
| // Creates a LiteRT LM Session Config. | |
| // The caller is responsible for destroying the config using | |
| // `litert_lm_session_config_delete`. | |
| // @return A pointer to the created config, or NULL on failure. | |
| LITERT_LM_C_API_EXPORT | |
| LiteRtLmSessionConfig* litert_lm_session_config_create(); | |
| // Sets the maximum number of output tokens per decode step for this session. | |
| // @param config The config to modify. | |
| // @param max_output_tokens The maximum number of output tokens. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_session_config_set_max_output_tokens( | |
| LiteRtLmSessionConfig* config, int max_output_tokens); | |
| // Sets the sampler parameters for this session config. | |
| // @param config The config to modify. | |
| // @param sampler_params The sampler parameters to use. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_session_config_set_sampler_params( | |
| LiteRtLmSessionConfig* config, const LiteRtLmSamplerParams* sampler_params); | |
| // Destroys a LiteRT LM Session Config. | |
| // @param config The config to destroy. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_session_config_delete(LiteRtLmSessionConfig* config); | |
| // Creates a LiteRT LM Conversation Config. | |
| // The caller is responsible for destroying the config using | |
| // `litert_lm_conversation_config_delete`. | |
| // @param engine The engine to use. | |
| // @param session_config The session config to use. If NULL, default | |
| // session config will be used. | |
| // @param system_message_json The system message in JSON format. | |
| // @param tools_json The tools description in JSON array format. | |
| // @param enable_constrained_decoding Whether to enable constrained decoding. | |
| // @return A pointer to the created config, or NULL on failure. | |
| LITERT_LM_C_API_EXPORT | |
| LiteRtLmConversationConfig* litert_lm_conversation_config_create( | |
| LiteRtLmEngine* engine, const LiteRtLmSessionConfig* session_config, | |
| const char* system_message_json, const char* tools_json, | |
| const char* messages_json, bool enable_constrained_decoding); | |
| // Destroys a LiteRT LM Conversation Config. | |
| // @param config The config to destroy. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_conversation_config_delete(LiteRtLmConversationConfig* config); | |
| // Sets the minimum log level for the LiteRT LM library. | |
| // Log levels are: 0=INFO, 1=WARNING, 2=ERROR, 3=FATAL. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_set_min_log_level(int level); | |
| // Represents the type of input data. | |
| typedef enum { | |
| kInputText, | |
| kInputImage, | |
| kInputImageEnd, | |
| kInputAudio, | |
| kInputAudioEnd, | |
| } InputDataType; | |
| // Represents a single piece of input data. | |
| typedef struct { | |
| InputDataType type; | |
| // The data pointer. The interpretation depends on the `type`. | |
| // For kInputText, it's a UTF-8 string. | |
| // For kInputImage and kInputAudio, it's a pointer to the raw bytes. | |
| const void* data; | |
| // The size of the data in bytes. | |
| size_t size; | |
| } InputData; | |
| // Creates LiteRT LM Engine Settings. The caller is responsible for destroying | |
| // the settings using `litert_lm_engine_settings_delete`. | |
| // | |
| // @param model_path The path to the model file. | |
| // @param backend_str The backend to use (e.g., "cpu", "gpu"). | |
| // @param vision_backend_str The vision backend to use, or NULL if not set. | |
| // @param audio_backend_str The audio backend to use, or NULL if not set. | |
| // @return A pointer to the created settings, or NULL on failure. | |
| LITERT_LM_C_API_EXPORT | |
| LiteRtLmEngineSettings* litert_lm_engine_settings_create( | |
| const char* model_path, const char* backend_str, | |
| const char* vision_backend_str, const char* audio_backend_str); | |
| // Destroys LiteRT LM Engine Settings. | |
| // | |
| // @param settings The settings to destroy. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_engine_settings_delete(LiteRtLmEngineSettings* settings); | |
| // Sets the maximum number of tokens for the engine. | |
| // | |
| // @param settings The engine settings. | |
| // @param max_num_tokens The maximum number of tokens. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_engine_settings_set_max_num_tokens( | |
| LiteRtLmEngineSettings* settings, int max_num_tokens); | |
| // Sets the cache directory for the engine. | |
| // | |
| // @param settings The engine settings. | |
| // @param cache_dir The cache directory. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_engine_settings_set_cache_dir(LiteRtLmEngineSettings* settings, | |
| const char* cache_dir); | |
| // Sets the activation data type. | |
| // | |
| // @param settings The engine settings. | |
| // @param activation_data_type_int The activation data type. See | |
| // `ActivationDataType` in executor_settings_base.h for the possible values | |
| // (e.g., 0 for F32, 1 for F16, 2 for I16, 3 for I8). | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_engine_settings_set_activation_data_type( | |
| LiteRtLmEngineSettings* settings, int activation_data_type_int); | |
| // Sets the prefill chunk size for the engine. Only applicable for CPU backend | |
| // with dynamic models. | |
| // | |
| // @param settings The engine settings. | |
| // @param prefill_chunk_size The prefill chunk size. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_engine_settings_set_prefill_chunk_size( | |
| LiteRtLmEngineSettings* settings, int prefill_chunk_size); | |
| // Enables benchmarking for the engine. | |
| // | |
| // @param settings The engine settings. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_engine_settings_enable_benchmark( | |
| LiteRtLmEngineSettings* settings); | |
| // Sets the number of prefill tokens for benchmarking. | |
| // | |
| // @param settings The engine settings. | |
| // @param num_prefill_tokens The number of prefill tokens. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_engine_settings_set_num_prefill_tokens( | |
| LiteRtLmEngineSettings* settings, int num_prefill_tokens); | |
| // Sets the number of decode tokens for benchmarking. | |
| // | |
| // @param settings The engine settings. | |
| // @param num_decode_tokens The number of decode tokens. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_engine_settings_set_num_decode_tokens( | |
| LiteRtLmEngineSettings* settings, int num_decode_tokens); | |
| // Creates a LiteRT LM Engine from the given settings. The caller is responsible | |
| // for destroying the engine using `litert_lm_engine_delete`. | |
| // | |
| // @param settings The engine settings. | |
| // @return A pointer to the created engine, or NULL on failure. | |
| LITERT_LM_C_API_EXPORT | |
| LiteRtLmEngine* litert_lm_engine_create(const LiteRtLmEngineSettings* settings); | |
| // Destroys a LiteRT LM Engine. | |
| // | |
| // @param engine The engine to destroy. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_engine_delete(LiteRtLmEngine* engine); | |
| // Creates a LiteRT LM Session. The caller is responsible for destroying the | |
| // session using `litert_lm_session_delete`. | |
| // | |
| // @param engine The engine to create the session from. | |
| // @param config The session config of the session. If NULL, use the default | |
| // session config. | |
| // @return A pointer to the created session, or NULL on failure. | |
| LITERT_LM_C_API_EXPORT | |
| LiteRtLmSession* litert_lm_engine_create_session(LiteRtLmEngine* engine, | |
| LiteRtLmSessionConfig* config); | |
| // Destroys a LiteRT LM Session. | |
| // | |
| // @param session The session to destroy. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_session_delete(LiteRtLmSession* session); | |
| // Generates content from the input prompt. | |
| // | |
| // @param session The session to use for generation. | |
| // @param inputs An array of InputData structs representing the multimodal | |
| // input. | |
| // @param num_inputs The number of InputData structs in the array. | |
| // @return A pointer to the responses, or NULL on failure. The caller is | |
| // responsible for deleting the responses using `litert_lm_responses_delete`. | |
| LITERT_LM_C_API_EXPORT | |
| LiteRtLmResponses* litert_lm_session_generate_content(LiteRtLmSession* session, | |
| const InputData* inputs, | |
| size_t num_inputs); | |
| // Destroys a LiteRT LM Responses object. | |
| // | |
| // @param responses The responses to destroy. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_responses_delete(LiteRtLmResponses* responses); | |
| // Returns the number of response candidates. | |
| // | |
| // @param responses The responses object. | |
| // @return The number of candidates. | |
| LITERT_LM_C_API_EXPORT | |
| int litert_lm_responses_get_num_candidates(const LiteRtLmResponses* responses); | |
| // Returns the response text at a given index. | |
| // | |
| // @param responses The responses object. | |
| // @param index The index of the response. | |
| // @return The response text. The returned string is owned by the `responses` | |
| // object and is valid only for its lifetime. Returns NULL if index is out of | |
| // bounds. | |
| LITERT_LM_C_API_EXPORT | |
| const char* litert_lm_responses_get_response_text_at( | |
| const LiteRtLmResponses* responses, int index); | |
| // Retrieves the benchmark information from the session. The caller is | |
| // responsible for destroying the benchmark info using | |
| // `litert_lm_benchmark_info_delete`. | |
| // | |
| // @param session The session to get the benchmark info from. | |
| // @return A pointer to the benchmark info, or NULL on failure. | |
| LITERT_LM_C_API_EXPORT | |
| LiteRtLmBenchmarkInfo* litert_lm_session_get_benchmark_info( | |
| LiteRtLmSession* session); | |
| // Destroys a LiteRT LM Benchmark Info object. | |
| // | |
| // @param benchmark_info The benchmark info to destroy. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_benchmark_info_delete(LiteRtLmBenchmarkInfo* benchmark_info); | |
| // Returns the time to the first token in seconds. | |
| // | |
| // Note that the first time to token doesn't include the time for | |
| // initialization. It is the sum of the prefill time for the first turn and | |
| // the time spent for decoding the first token. | |
| // | |
| // @param benchmark_info The benchmark info object. | |
| // @return The time to the first token in seconds. | |
| LITERT_LM_C_API_EXPORT | |
| double litert_lm_benchmark_info_get_time_to_first_token( | |
| const LiteRtLmBenchmarkInfo* benchmark_info); | |
| // Returns the total initialization time in seconds. | |
| // | |
| // @param benchmark_info The benchmark info object. | |
| // @return The total initialization time in seconds. | |
| LITERT_LM_C_API_EXPORT | |
| double litert_lm_benchmark_info_get_total_init_time_in_second( | |
| const LiteRtLmBenchmarkInfo* benchmark_info); | |
| // Returns the number of prefill turns. | |
| // | |
| // @param benchmark_info The benchmark info object. | |
| // @return The number of prefill turns. | |
| LITERT_LM_C_API_EXPORT | |
| int litert_lm_benchmark_info_get_num_prefill_turns( | |
| const LiteRtLmBenchmarkInfo* benchmark_info); | |
| // Returns the number of decode turns. | |
| // | |
| // @param benchmark_info The benchmark info object. | |
| // @return The number of decode turns. | |
| LITERT_LM_C_API_EXPORT | |
| int litert_lm_benchmark_info_get_num_decode_turns( | |
| const LiteRtLmBenchmarkInfo* benchmark_info); | |
| // Returns the prefill token count at a given turn index. | |
| // | |
| // @param benchmark_info The benchmark info object. | |
| // @param index The index of the prefill turn. | |
| // @return The prefill token count. | |
| LITERT_LM_C_API_EXPORT | |
| int litert_lm_benchmark_info_get_prefill_token_count_at( | |
| const LiteRtLmBenchmarkInfo* benchmark_info, int index); | |
| // Returns the decode token count at a given turn index. | |
| // | |
| // @param benchmark_info The benchmark info object. | |
| // @param index The index of the decode turn. | |
| // @return The decode token count. | |
| LITERT_LM_C_API_EXPORT | |
| int litert_lm_benchmark_info_get_decode_token_count_at( | |
| const LiteRtLmBenchmarkInfo* benchmark_info, int index); | |
| // Returns the prefill tokens per second at a given turn index. | |
| // | |
| // @param benchmark_info The benchmark info object. | |
| // @param index The index of the prefill turn. | |
| // @return The prefill tokens per second. | |
| LITERT_LM_C_API_EXPORT | |
| double litert_lm_benchmark_info_get_prefill_tokens_per_sec_at( | |
| const LiteRtLmBenchmarkInfo* benchmark_info, int index); | |
| // Returns the decode tokens per second at a given turn index. | |
| // | |
| // @param benchmark_info The benchmark info object. | |
| // @param index The index of the decode turn. | |
| // @return The decode tokens per second. | |
| LITERT_LM_C_API_EXPORT | |
| double litert_lm_benchmark_info_get_decode_tokens_per_sec_at( | |
| const LiteRtLmBenchmarkInfo* benchmark_info, int index); | |
| // Callback for streaming responses. | |
| // `callback_data` is a pointer to user-defined data passed to the stream | |
| // function. `chunk` is the piece of text from the stream. It's only valid for | |
| // the duration of the call. `is_final` is true if this is the last chunk in the | |
| // stream. `error_msg` is a null-terminated string with an error message, or | |
| // NULL on success. | |
| typedef void (*LiteRtLmStreamCallback)(void* callback_data, const char* chunk, | |
| bool is_final, const char* error_msg); | |
| // Generates content from the input prompt and streams the response via a | |
| // callback. This is a non-blocking call that will invoke the callback from a | |
| // background thread for each chunk. | |
| // | |
| // @param session The session to use for generation. | |
| // @param inputs An array of InputData structs representing the multimodal | |
| // input. | |
| // @param num_inputs The number of InputData structs in the array. | |
| // @param callback The callback function to receive response chunks. | |
| // @param callback_data A pointer to user data that will be passed to the | |
| // callback. | |
| // @return 0 on success, non-zero on failure to start the stream. | |
| LITERT_LM_C_API_EXPORT | |
| int litert_lm_session_generate_content_stream(LiteRtLmSession* session, | |
| const InputData* inputs, | |
| size_t num_inputs, | |
| LiteRtLmStreamCallback callback, | |
| void* callback_data); | |
| // Creates a LiteRT LM Conversation. The caller is responsible for destroying | |
| // the conversation using `litert_lm_conversation_delete`. | |
| // | |
| // @param engine The engine to create the conversation from. | |
| // @param config The conversation config to use. If NULL, the default config | |
| // will be used. | |
| // @return A pointer to the created conversation, or NULL on failure. | |
| LITERT_LM_C_API_EXPORT | |
| LiteRtLmConversation* litert_lm_conversation_create( | |
| LiteRtLmEngine* engine, LiteRtLmConversationConfig* config); | |
| // Destroys a LiteRT LM Conversation. | |
| // | |
| // @param conversation The conversation to destroy. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_conversation_delete(LiteRtLmConversation* conversation); | |
| // Sends a message to the conversation and returns the response. | |
| // This is a blocking call. | |
| // | |
| // @param conversation The conversation to use. | |
| // @param message_json A JSON string representing the message to send. | |
| // @param extra_context A JSON string representing the extra context to use. | |
| // @return A pointer to the JSON response, or NULL on failure. The caller is | |
| // responsible for deleting the response using | |
| // `litert_lm_json_response_delete`. | |
| LITERT_LM_C_API_EXPORT | |
| LiteRtLmJsonResponse* litert_lm_conversation_send_message( | |
| LiteRtLmConversation* conversation, const char* message_json, | |
| const char* extra_context); | |
| // Destroys a LiteRT LM Json Response object. | |
| // | |
| // @param response The response to destroy. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_json_response_delete(LiteRtLmJsonResponse* response); | |
| // Returns the JSON response string from a response object. | |
| // | |
| // @param response The response object. | |
| // @return The response JSON string. The returned string is owned by the | |
| // `response` object and is valid only for its lifetime. Returns NULL if | |
| // response is NULL. | |
| LITERT_LM_C_API_EXPORT | |
| const char* litert_lm_json_response_get_string( | |
| const LiteRtLmJsonResponse* response); | |
| // Sends a message to the conversation and streams the response via a | |
| // callback. This is a non-blocking call that will invoke the callback from a | |
| // background thread for each chunk. | |
| // | |
| // @param conversation The conversation to use. | |
| // @param message_json A JSON string representing the message to send. | |
| // @param extra_context A JSON string representing the extra context to use. | |
| // @param callback The callback function to receive response chunks. | |
| // @param callback_data A pointer to user data that will be passed to the | |
| // callback. | |
| // @return 0 on success, non-zero on failure to start the stream. | |
| LITERT_LM_C_API_EXPORT | |
| int litert_lm_conversation_send_message_stream( | |
| LiteRtLmConversation* conversation, const char* message_json, | |
| const char* extra_context, LiteRtLmStreamCallback callback, | |
| void* callback_data); | |
| // Cancels the ongoing inference process, for asynchronous inference. | |
| // | |
| // @param conversation The conversation to cancel the inference for. | |
| LITERT_LM_C_API_EXPORT | |
| void litert_lm_conversation_cancel_process(LiteRtLmConversation* conversation); | |
| // Retrieves the benchmark information from the conversation. The caller is | |
| // responsible for destroying the benchmark info using | |
| // `litert_lm_benchmark_info_delete`. | |
| // | |
| // @param conversation The conversation to get the benchmark info from. | |
| // @return A pointer to the benchmark info, or NULL on failure. | |
| LITERT_LM_C_API_EXPORT | |
| LiteRtLmBenchmarkInfo* litert_lm_conversation_get_benchmark_info( | |
| LiteRtLmConversation* conversation); | |
| } // extern "C" | |