File size: 19,608 Bytes
5f923cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
// Copyright 2025 The ODML Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef THIRD_PARTY_ODML_LITERT_LM_C_ENGINE_H_
#define THIRD_PARTY_ODML_LITERT_LM_C_ENGINE_H_

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

// For Windows, __declspec( dllexport ) is required to export function in .dll.
// https://learn.microsoft.com/en-us/cpp/cpp/using-dllimport-and-dllexport-in-cpp-classes?view=msvc-170
//
// _WIN32 is defined as 1 when the compilation target is 32-bit ARM, 64-bit ARM,
// x86, x64, or ARM64EC. Otherwise, undefined.
// https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros
#if defined(_WIN32)
#define LITERT_LM_C_API_EXPORT __declspec(dllexport)
#else
#define LITERT_LM_C_API_EXPORT
#endif

// Opaque pointer for the LiteRT LM Engine.
typedef struct LiteRtLmEngine LiteRtLmEngine;

// Opaque pointer for the LiteRT LM Session.
typedef struct LiteRtLmSession LiteRtLmSession;

// Opaque pointer for the LiteRT LM Responses.
typedef struct LiteRtLmResponses LiteRtLmResponses;

// Opaque pointer for the LiteRT LM Engine Settings.
typedef struct LiteRtLmEngineSettings LiteRtLmEngineSettings;

// Opaque pointer for the LiteRT LM Benchmark Info.
typedef struct LiteRtLmBenchmarkInfo LiteRtLmBenchmarkInfo;

// Opaque pointer for the LiteRT LM Conversation.
typedef struct LiteRtLmConversation LiteRtLmConversation;

// Opaque pointer for a JSON response.
typedef struct LiteRtLmJsonResponse LiteRtLmJsonResponse;

// Opaque pointer for LiteRT LM Session Config.
typedef struct LiteRtLmSessionConfig LiteRtLmSessionConfig;

// Opaque pointer for LiteRT LM Conversation Config.
typedef struct LiteRtLmConversationConfig LiteRtLmConversationConfig;

// Represents the type of sampler.
typedef enum {
  kTypeUnspecified = 0,
  // Probabilistically pick among the top k tokens.
  kTopK = 1,
  // Probabilistically pick among the tokens such that the sum is greater
  // than or equal to p tokens after first performing top-k sampling.
  kTopP = 2,
  // Pick the token with maximum logit (i.e., argmax).
  kGreedy = 3,
} Type;

// Parameters for the sampler.
typedef struct {
  Type type;
  int32_t top_k;
  float top_p;
  float temperature;
  int32_t seed;
} LiteRtLmSamplerParams;

// Creates a LiteRT LM Session Config.
// The caller is responsible for destroying the config using
// `litert_lm_session_config_delete`.
// @return A pointer to the created config, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmSessionConfig* litert_lm_session_config_create();

// Sets the maximum number of output tokens per decode step for this session.
// @param config The config to modify.
// @param max_output_tokens The maximum number of output tokens.
LITERT_LM_C_API_EXPORT
void litert_lm_session_config_set_max_output_tokens(
    LiteRtLmSessionConfig* config, int max_output_tokens);

// Sets the sampler parameters for this session config.
// @param config The config to modify.
// @param sampler_params The sampler parameters to use.
LITERT_LM_C_API_EXPORT
void litert_lm_session_config_set_sampler_params(
    LiteRtLmSessionConfig* config, const LiteRtLmSamplerParams* sampler_params);

// Destroys a LiteRT LM Session Config.
// @param config The config to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_session_config_delete(LiteRtLmSessionConfig* config);

// Creates a LiteRT LM Conversation Config.
// The caller is responsible for destroying the config using
// `litert_lm_conversation_config_delete`.
// @param engine The engine to use.
// @param session_config The session config to use. If NULL, default
// session config will be used.
// @param system_message_json The system message in JSON format.
// @param tools_json The tools description in JSON array format.
// @param enable_constrained_decoding Whether to enable constrained decoding.
// @return A pointer to the created config, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmConversationConfig* litert_lm_conversation_config_create(
    LiteRtLmEngine* engine, const LiteRtLmSessionConfig* session_config,
    const char* system_message_json, const char* tools_json,
    const char* messages_json, bool enable_constrained_decoding);

// Destroys a LiteRT LM Conversation Config.
// @param config The config to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_conversation_config_delete(LiteRtLmConversationConfig* config);

// Sets the minimum log level for the LiteRT LM library.
// Log levels are: 0=INFO, 1=WARNING, 2=ERROR, 3=FATAL.
LITERT_LM_C_API_EXPORT
void litert_lm_set_min_log_level(int level);

// Represents the type of input data.
typedef enum {
  kInputText,
  kInputImage,
  kInputImageEnd,
  kInputAudio,
  kInputAudioEnd,
} InputDataType;

// Represents a single piece of input data.
typedef struct {
  InputDataType type;
  // The data pointer. The interpretation depends on the `type`.
  // For kInputText, it's a UTF-8 string.
  // For kInputImage and kInputAudio, it's a pointer to the raw bytes.
  const void* data;
  // The size of the data in bytes.
  size_t size;
} InputData;

// Creates LiteRT LM Engine Settings. The caller is responsible for destroying
// the settings using `litert_lm_engine_settings_delete`.
//
// @param model_path The path to the model file.
// @param backend_str The backend to use (e.g., "cpu", "gpu").
// @param vision_backend_str The vision backend to use, or NULL if not set.
// @param audio_backend_str The audio backend to use, or NULL if not set.
// @return A pointer to the created settings, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmEngineSettings* litert_lm_engine_settings_create(
    const char* model_path, const char* backend_str,
    const char* vision_backend_str, const char* audio_backend_str);

// Destroys LiteRT LM Engine Settings.
//
// @param settings The settings to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_delete(LiteRtLmEngineSettings* settings);

// Sets the maximum number of tokens for the engine.
//
// @param settings The engine settings.
// @param max_num_tokens The maximum number of tokens.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_set_max_num_tokens(
    LiteRtLmEngineSettings* settings, int max_num_tokens);

// Sets the cache directory for the engine.
//
// @param settings The engine settings.
// @param cache_dir The cache directory.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_set_cache_dir(LiteRtLmEngineSettings* settings,
                                             const char* cache_dir);

// Sets the activation data type.
//
// @param settings The engine settings.
// @param activation_data_type_int The activation data type. See
// `ActivationDataType` in executor_settings_base.h for the possible values
// (e.g., 0 for F32, 1 for F16, 2 for I16, 3 for I8).
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_set_activation_data_type(
    LiteRtLmEngineSettings* settings, int activation_data_type_int);

// Sets the prefill chunk size for the engine. Only applicable for CPU backend
// with dynamic models.
//
// @param settings The engine settings.
// @param prefill_chunk_size The prefill chunk size.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_set_prefill_chunk_size(
    LiteRtLmEngineSettings* settings, int prefill_chunk_size);

// Enables benchmarking for the engine.
//
// @param settings The engine settings.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_enable_benchmark(
    LiteRtLmEngineSettings* settings);

// Sets the number of prefill tokens for benchmarking.
//
// @param settings The engine settings.
// @param num_prefill_tokens The number of prefill tokens.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_set_num_prefill_tokens(
    LiteRtLmEngineSettings* settings, int num_prefill_tokens);

// Sets the number of decode tokens for benchmarking.
//
// @param settings The engine settings.
// @param num_decode_tokens The number of decode tokens.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_settings_set_num_decode_tokens(
    LiteRtLmEngineSettings* settings, int num_decode_tokens);

// Creates a LiteRT LM Engine from the given settings. The caller is responsible
// for destroying the engine using `litert_lm_engine_delete`.
//
// @param settings The engine settings.
// @return A pointer to the created engine, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmEngine* litert_lm_engine_create(const LiteRtLmEngineSettings* settings);

// Destroys a LiteRT LM Engine.
//
// @param engine The engine to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_engine_delete(LiteRtLmEngine* engine);

// Creates a LiteRT LM Session. The caller is responsible for destroying the
// session using `litert_lm_session_delete`.
//
// @param engine The engine to create the session from.
// @param config The session config of the session. If NULL, use the default
// session config.
// @return A pointer to the created session, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmSession* litert_lm_engine_create_session(LiteRtLmEngine* engine,
                                                 LiteRtLmSessionConfig* config);

// Destroys a LiteRT LM Session.
//
// @param session The session to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_session_delete(LiteRtLmSession* session);

// Generates content from the input prompt.
//
// @param session The session to use for generation.
// @param inputs An array of InputData structs representing the multimodal
//   input.
// @param num_inputs The number of InputData structs in the array.
// @return A pointer to the responses, or NULL on failure. The caller is
//   responsible for deleting the responses using `litert_lm_responses_delete`.
LITERT_LM_C_API_EXPORT
LiteRtLmResponses* litert_lm_session_generate_content(LiteRtLmSession* session,
                                                      const InputData* inputs,
                                                      size_t num_inputs);
// Destroys a LiteRT LM Responses object.
//
// @param responses The responses to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_responses_delete(LiteRtLmResponses* responses);

// Returns the number of response candidates.
//
// @param responses The responses object.
// @return The number of candidates.
LITERT_LM_C_API_EXPORT
int litert_lm_responses_get_num_candidates(const LiteRtLmResponses* responses);

// Returns the response text at a given index.
//
// @param responses The responses object.
// @param index The index of the response.
// @return The response text. The returned string is owned by the `responses`
//   object and is valid only for its lifetime. Returns NULL if index is out of
//   bounds.
LITERT_LM_C_API_EXPORT
const char* litert_lm_responses_get_response_text_at(
    const LiteRtLmResponses* responses, int index);

// Retrieves the benchmark information from the session. The caller is
// responsible for destroying the benchmark info using
// `litert_lm_benchmark_info_delete`.
//
// @param session The session to get the benchmark info from.
// @return A pointer to the benchmark info, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmBenchmarkInfo* litert_lm_session_get_benchmark_info(
    LiteRtLmSession* session);

// Destroys a LiteRT LM Benchmark Info object.
//
// @param benchmark_info The benchmark info to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_benchmark_info_delete(LiteRtLmBenchmarkInfo* benchmark_info);

// Returns the time to the first token in seconds.
//
// Note that the first time to token doesn't include the time for
// initialization. It is the sum of the prefill time for the first turn and
// the time spent for decoding the first token.
//
// @param benchmark_info The benchmark info object.
// @return The time to the first token in seconds.
LITERT_LM_C_API_EXPORT
double litert_lm_benchmark_info_get_time_to_first_token(
    const LiteRtLmBenchmarkInfo* benchmark_info);

// Returns the total initialization time in seconds.
//
// @param benchmark_info The benchmark info object.
// @return The total initialization time in seconds.
LITERT_LM_C_API_EXPORT
double litert_lm_benchmark_info_get_total_init_time_in_second(
    const LiteRtLmBenchmarkInfo* benchmark_info);

// Returns the number of prefill turns.
//
// @param benchmark_info The benchmark info object.
// @return The number of prefill turns.
LITERT_LM_C_API_EXPORT
int litert_lm_benchmark_info_get_num_prefill_turns(
    const LiteRtLmBenchmarkInfo* benchmark_info);

// Returns the number of decode turns.
//
// @param benchmark_info The benchmark info object.
// @return The number of decode turns.
LITERT_LM_C_API_EXPORT
int litert_lm_benchmark_info_get_num_decode_turns(
    const LiteRtLmBenchmarkInfo* benchmark_info);

// Returns the prefill token count at a given turn index.
//
// @param benchmark_info The benchmark info object.
// @param index The index of the prefill turn.
// @return The prefill token count.
LITERT_LM_C_API_EXPORT
int litert_lm_benchmark_info_get_prefill_token_count_at(
    const LiteRtLmBenchmarkInfo* benchmark_info, int index);

// Returns the decode token count at a given turn index.
//
// @param benchmark_info The benchmark info object.
// @param index The index of the decode turn.
// @return The decode token count.
LITERT_LM_C_API_EXPORT
int litert_lm_benchmark_info_get_decode_token_count_at(
    const LiteRtLmBenchmarkInfo* benchmark_info, int index);

// Returns the prefill tokens per second at a given turn index.
//
// @param benchmark_info The benchmark info object.
// @param index The index of the prefill turn.
// @return The prefill tokens per second.
LITERT_LM_C_API_EXPORT
double litert_lm_benchmark_info_get_prefill_tokens_per_sec_at(
    const LiteRtLmBenchmarkInfo* benchmark_info, int index);

// Returns the decode tokens per second at a given turn index.
//
// @param benchmark_info The benchmark info object.
// @param index The index of the decode turn.
// @return The decode tokens per second.
LITERT_LM_C_API_EXPORT
double litert_lm_benchmark_info_get_decode_tokens_per_sec_at(
    const LiteRtLmBenchmarkInfo* benchmark_info, int index);

// Callback for streaming responses.
// `callback_data` is a pointer to user-defined data passed to the stream
// function. `chunk` is the piece of text from the stream. It's only valid for
// the duration of the call. `is_final` is true if this is the last chunk in the
// stream. `error_msg` is a null-terminated string with an error message, or
// NULL on success.
typedef void (*LiteRtLmStreamCallback)(void* callback_data, const char* chunk,
                                       bool is_final, const char* error_msg);

// Generates content from the input prompt and streams the response via a
// callback. This is a non-blocking call that will invoke the callback from a
// background thread for each chunk.
//
// @param session The session to use for generation.
// @param inputs An array of InputData structs representing the multimodal
//   input.
// @param num_inputs The number of InputData structs in the array.
// @param callback The callback function to receive response chunks.
// @param callback_data A pointer to user data that will be passed to the
// callback.
// @return 0 on success, non-zero on failure to start the stream.
LITERT_LM_C_API_EXPORT
int litert_lm_session_generate_content_stream(LiteRtLmSession* session,
                                              const InputData* inputs,
                                              size_t num_inputs,
                                              LiteRtLmStreamCallback callback,
                                              void* callback_data);

// Creates a LiteRT LM Conversation. The caller is responsible for destroying
// the conversation using `litert_lm_conversation_delete`.
//
// @param engine The engine to create the conversation from.
// @param config The conversation config to use. If NULL, the default config
//   will be used.
// @return A pointer to the created conversation, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmConversation* litert_lm_conversation_create(
    LiteRtLmEngine* engine, LiteRtLmConversationConfig* config);

// Destroys a LiteRT LM Conversation.
//
// @param conversation The conversation to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_conversation_delete(LiteRtLmConversation* conversation);

// Sends a message to the conversation and returns the response.
// This is a blocking call.
//
// @param conversation The conversation to use.
// @param message_json A JSON string representing the message to send.
// @param extra_context A JSON string representing the extra context to use.
// @return A pointer to the JSON response, or NULL on failure. The caller is
//   responsible for deleting the response using
//   `litert_lm_json_response_delete`.
LITERT_LM_C_API_EXPORT
LiteRtLmJsonResponse* litert_lm_conversation_send_message(
    LiteRtLmConversation* conversation, const char* message_json,
    const char* extra_context);

// Destroys a LiteRT LM Json Response object.
//
// @param response The response to destroy.
LITERT_LM_C_API_EXPORT
void litert_lm_json_response_delete(LiteRtLmJsonResponse* response);

// Returns the JSON response string from a response object.
//
// @param response The response object.
// @return The response JSON string. The returned string is owned by the
//   `response` object and is valid only for its lifetime. Returns NULL if
//   response is NULL.
LITERT_LM_C_API_EXPORT
const char* litert_lm_json_response_get_string(
    const LiteRtLmJsonResponse* response);

// Sends a message to the conversation and streams the response via a
// callback. This is a non-blocking call that will invoke the callback from a
// background thread for each chunk.
//
// @param conversation The conversation to use.
// @param message_json A JSON string representing the message to send.
// @param extra_context A JSON string representing the extra context to use.
// @param callback The callback function to receive response chunks.
// @param callback_data A pointer to user data that will be passed to the
// callback.
// @return 0 on success, non-zero on failure to start the stream.
LITERT_LM_C_API_EXPORT
int litert_lm_conversation_send_message_stream(
    LiteRtLmConversation* conversation, const char* message_json,
    const char* extra_context, LiteRtLmStreamCallback callback,
    void* callback_data);

// Cancels the ongoing inference process, for asynchronous inference.
//
// @param conversation The conversation to cancel the inference for.
LITERT_LM_C_API_EXPORT
void litert_lm_conversation_cancel_process(LiteRtLmConversation* conversation);

// Retrieves the benchmark information from the conversation. The caller is
// responsible for destroying the benchmark info using
// `litert_lm_benchmark_info_delete`.
//
// @param conversation The conversation to get the benchmark info from.
// @return A pointer to the benchmark info, or NULL on failure.
LITERT_LM_C_API_EXPORT
LiteRtLmBenchmarkInfo* litert_lm_conversation_get_benchmark_info(
    LiteRtLmConversation* conversation);

#ifdef __cplusplus
}  // extern "C"
#endif

#endif  // THIRD_PARTY_ODML_LITERT_LM_C_ENGINE_H_