Spaces:

FINAL-Bench
/

LiteRT-LM

Running

App Files Files Community

LiteRT-LM / runtime /conversation /conversation.h

SeaWolf-AI

Upload full LiteRT-LM codebase

5f923cd verified 6 days ago

raw

history blame contribute delete

28.9 kB

	// Copyright 2025 The ODML Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#ifndef THIRD_PARTY_ODML_LITERT_LM_RUNTIME_CONVERSATION_CONVERSATION_H_
	#define THIRD_PARTY_ODML_LITERT_LM_RUNTIME_CONVERSATION_CONVERSATION_H_

	#include <memory>
	#include <optional>
	#include <string>
	#include <utility>
	#include <vector>

	#include "absl/base/thread_annotations.h" // from @com_google_absl
	#include "absl/container/flat_hash_map.h" // from @com_google_absl
	#include "absl/functional/any_invocable.h" // from @com_google_absl
	#include "absl/status/status.h" // from @com_google_absl
	#include "absl/status/statusor.h" // from @com_google_absl
	#include "absl/strings/string_view.h" // from @com_google_absl
	#include "absl/synchronization/mutex.h" // from @com_google_absl
	#include "absl/types/span.h" // from @com_google_absl
	#include "nlohmann/json_fwd.hpp" // from @nlohmann_json
	#include "runtime/components/constrained_decoding/constraint.h"
	#include "runtime/components/constrained_decoding/constraint_provider.h"
	#include "runtime/components/constrained_decoding/constraint_provider_config.h"
	#include "runtime/components/prompt_template.h"
	#include "runtime/conversation/io_types.h"
	#include "runtime/conversation/model_data_processor/config_registry.h"
	#include "runtime/conversation/model_data_processor/model_data_processor.h"
	#include "runtime/engine/engine.h"
	#include "runtime/engine/engine_settings.h"
	#include "runtime/engine/io_types.h"
	#include "runtime/util/status_macros.h"

	namespace litert::lm {

	// Configuration for the Conversation instance. This class is used to initialize
	// the Conversation instance.
	//
	// To create a ConversationConfig, use ConversationConfig::CreateDefault() to
	// create a default config, or use the ConversationConfig::Builder() to build a
	// custom config.
	//
	// Note: Consider to remove ConversationConfig and use ConversationBuilder to
	// build Conversation.
	class ConversationConfig {
	public:
	// Creates a default ConversationConfig from the given Engine.
	// Args:
	// - `engine`: The Engine instance to be used for creating the default config.
	static absl::StatusOr<ConversationConfig> CreateDefault(const Engine& engine);

	// Returns the SessionConfig used for creating the ConversationConfig.
	const SessionConfig& GetSessionConfig() const { return session_config_; }

	// Returns the Preface used for creating the ConversationConfig.
	const Preface& GetPreface() const { return preface_; }

	// Returns the PromptTemplate used for creating the ConversationConfig.
	const PromptTemplate& GetPromptTemplate() const { return prompt_template_; }

	// Returns the DataProcessorConfig used for creating the ConversationConfig.
	const DataProcessorConfig& GetProcessorConfig() const {
	return processor_config_;
	}

	// Returns whether constrained decoding is enabled.
	bool constrained_decoding_enabled() const {
	return constrained_decoding_enabled_;
	}

	// Returns whether the preface should be prefilled when the Conversation is
	// created. This will make the first response faster, but take longer to
	// initialize.
	bool prefill_preface_on_init() const { return prefill_preface_on_init_; }

	// Returns the channels configured for the conversation.
	const std::vector<Channel>& GetChannels() const { return channels_; }

	// Returns whether to filter channel content from the KV cache.
	bool filter_channel_content_from_kv_cache() const {
	return filter_channel_content_from_kv_cache_;
	}

	public:
	// Builder class for ConversationConfig.
	//
	// Example usage:
	// // Create a ConversationConfig instance using the Builder.
	// ASSIGN_OR_RETURN(auto conversation_config,
	// ConversationConfig::Builder()
	// .SetEnableConstrainedDecoding(true)
	// .SetPrefillPrefaceOnInit(true)
	// .Build(*engine));
	class Builder {
	public:
	// Sets the SessionConfig to be used for creating the ConversationConfig.
	Builder& SetSessionConfig(const SessionConfig& session_config) {
	session_config_ = session_config;
	return *this;
	}

	// Sets the Preface for the conversation. The Preface provides
	// the initial background for the conversation, tool uses and extra
	// context for the conversation. If not provided, the conversation will
	// start with an empty Preface.
	Builder& SetPreface(const Preface& preface) {
	preface_ = preface;
	return *this;
	}

	// Sets the PromptTemplate instance to be used for the conversation. If
	// not provided, the conversation will use the template read from the model
	// metadata.
	Builder& SetOverwritePromptTemplate(
	const PromptTemplate& overwrite_prompt_template) {
	overwrite_prompt_template_ = overwrite_prompt_template;
	return *this;
	}

	// Sets the configuration for the model data processor. If not provided,
	// the default config for the model type's data processor will be used.
	// Most of the time, the users don't need to provide the data processor
	// config.
	Builder& SetOverwriteProcessorConfig(
	const DataProcessorConfig& overwrite_processor_config) {
	overwrite_processor_config_ = overwrite_processor_config;
	return *this;
	}

	// Sets whether to enable constrained decoding. If true, constrained
	// decoding will be used, primarily for function calling.
	Builder& SetEnableConstrainedDecoding(bool enable_constrained_decoding) {
	enable_constrained_decoding_ = enable_constrained_decoding;
	return *this;
	}

	// Sets whether to prefill the preface on init. If true, the preface will
	// be prefilled on init, which will make the first response faster, but
	// take longer to initialize.
	Builder& SetPrefillPrefaceOnInit(bool prefill_preface_on_init) {
	prefill_preface_on_init_ = prefill_preface_on_init;
	return *this;
	}

	// Sets the configuration for the constraint provider.
	Builder& SetConstraintProviderConfig(
	const ConstraintProviderConfig& constraint_provider_config) {
	constraint_provider_config_ = constraint_provider_config;
	return *this;
	}

	// Sets the channels for the conversation.
	Builder& SetChannels(const std::vector<Channel>& channels) {
	channels_ = channels;
	return *this;
	}

	// Sets whether to filter channel content from the KV cache. This is useful
	// when the model responds with "channel" content, e.g. thinking/reasoning
	// tokens, that should not be persisted in the KV cache.
	Builder& SetFilterChannelContentFromKvCache(
	bool filter_channel_content_from_kv_cache) {
	filter_channel_content_from_kv_cache_ =
	filter_channel_content_from_kv_cache;
	return *this;
	}

	absl::StatusOr<ConversationConfig> Build(const Engine& engine) {
	return ConversationConfig::CreateInternal(
	engine, session_config_, preface_, overwrite_prompt_template_,
	overwrite_processor_config_, enable_constrained_decoding_,
	prefill_preface_on_init_, constraint_provider_config_, channels_,
	filter_channel_content_from_kv_cache_);
	}

	// Returns a unique pointer to a ConversationConfig.
	absl::StatusOr<std::unique_ptr<ConversationConfig>> BuildUnique(
	const Engine& engine) {
	ASSIGN_OR_RETURN(ConversationConfig config, Build(engine));
	return std::make_unique<ConversationConfig>(std::move(config));
	}

	private:
	SessionConfig session_config_ = SessionConfig::CreateDefault();
	std::optional<Preface> preface_;
	std::optional<PromptTemplate> overwrite_prompt_template_;
	std::optional<DataProcessorConfig> overwrite_processor_config_;
	bool enable_constrained_decoding_ = false;
	bool prefill_preface_on_init_ = false;
	std::optional<ConstraintProviderConfig> constraint_provider_config_;
	std::optional<std::vector<Channel>> channels_ = std::nullopt;
	bool filter_channel_content_from_kv_cache_ = false;
	};

	// Returns the constrained decoding config.
	const std::optional<ConstraintProviderConfig>& constraint_provider_config()
	const {
	return constraint_provider_config_;
	}

	private:
	// Creates a ConversationConfig.
	// Args:
	// - `engine`: The Engine instance to be used to validate the SessionConfig.
	// - `session_config`: The SessionConfig to be used for creating the
	// ConversationConfig.
	// - `preface`: Optional Preface for the conversation. The Preface provides
	// the initial background for the conversation, tool uses and extra
	// context for the conversation. If not provided, the conversation will
	// start with an empty Preface.
	// - `overwrite_prompt_template`: Optional PromptTemplate instance to be used
	// for the conversation. If not provided, the conversation will use the
	// template read from the model metadata "jinja_prompt_template". If not
	// provided, LiteRT-LM will try to generate a default one based on the llm
	// model type.
	// - `overwrite_processor_config`: Optional configuration for the model data
	// processor, if not provided, the default config for the model type's
	// data processor will be used. Most of the time, the users don't need to
	// provide the data processor config.
	// - `enable_constrained_decoding`: Whether to enable constrained decoding. If
	// true, constrained decoding will be used, primarily for function
	// calling.
	// - `prefill_preface_on_init`: Whether to prefill the preface on init. If
	// true, the preface will be prefilled on init, which will make the first
	// response faster, but take longer to initialize.
	// - `channels`: The channels configured for the conversation.
	static absl::StatusOr<ConversationConfig> CreateInternal(
	const Engine& engine, const SessionConfig& session_config,
	std::optional<Preface> preface = std::nullopt,
	std::optional<PromptTemplate> overwrite_prompt_template = std::nullopt,
	std::optional<DataProcessorConfig> overwrite_processor_config =
	std::nullopt,
	bool enable_constrained_decoding = false,
	bool prefill_preface_on_init = false,
	std::optional<ConstraintProviderConfig> constraint_provider_config =
	std::nullopt,
	std::optional<std::vector<Channel>> channels = std::nullopt,
	bool filter_channel_content_from_kv_cache = false);

	explicit ConversationConfig(SessionConfig session_config, Preface preface,
	PromptTemplate prompt_template,
	DataProcessorConfig processor_config,
	bool constrained_decoding_enabled = false,
	bool prefill_preface_on_init = false,
	std::optional<ConstraintProviderConfig>
	constraint_provider_config = std::nullopt,
	std::vector<Channel> channels = {},
	bool filter_channel_content_from_kv_cache = false)
	: session_config_(std::move(session_config)),
	preface_(std::move(preface)),
	prompt_template_(std::move(prompt_template)),
	processor_config_(std::move(processor_config)),
	constrained_decoding_enabled_(constrained_decoding_enabled),
	prefill_preface_on_init_(prefill_preface_on_init),
	constraint_provider_config_(std::move(constraint_provider_config)),
	channels_(std::move(channels)),
	filter_channel_content_from_kv_cache_(
	filter_channel_content_from_kv_cache) {}

	SessionConfig session_config_;
	Preface preface_;
	PromptTemplate prompt_template_;
	DataProcessorConfig processor_config_;
	bool constrained_decoding_enabled_;
	bool prefill_preface_on_init_;
	std::optional<ConstraintProviderConfig> constraint_provider_config_;
	std::vector<Channel> channels_;
	bool filter_channel_content_from_kv_cache_;
	};

	// Optional arguments for sending a message to the LLM.
	struct OptionalArgs {
	// Whether there is a pending message to be sent. If true, only the prefill
	// stage of LLM will be triggered, and the following decode stage will be
	// skipped. This is useful for the case where we need to append multiple
	// messages to the conversation, but only want to generate a response once.
	//
	// To also trigger the decode stage, set this field to false. Or to explicitly
	// trigger the decode stage only, set this field to false and send an empty
	// content message.
	//
	// Note: this option is only valid for model templates and
	// ModelDataProcessor that supports single turn prompt rendering.
	//
	// Example usages:
	//
	// Append multiple messages to the conversation without triggering the decode
	// stage.
	//
	// ASSERT_OK(conversation->SendMessage(
	// JsonMessage{{"role", "user"}, {"content", "Hello world!"}},
	// {.has_pending_message = true}));
	//
	// ASSERT_OK(conversation->SendMessage(
	// JsonMessage{{"role", "user"}, {"content", " This is a long message."}},
	// {.has_pending_message = true}));
	//
	// By sending a message with has_pending_message set to false, the decode
	// stage will be triggered, and the decode result will be returned.
	//
	// ASSERT_OK(conversation->SendMessage(
	// JsonMessage{{"role", "user"}, {"content", " This is the last message."}},
	// {.has_pending_message = false}));
	//
	// Alternatively, send an empty message with has_pending_message set to false
	// to only trigger the decode stage.
	//
	// ASSERT_OK(conversation->SendMessage(
	// JsonMessage{{"role", "user"}, {"content", " This is the last message."}},
	// {.has_pending_message = true}));
	//
	// ASSERT_OK(conversation->SendMessage(
	// JsonMessage{{"role", "user"}, {"content", ""}},
	// {.has_pending_message = false}));
	bool has_pending_message = false;

	// The constraint to be used for constrained decoding.
	std::optional<ConstraintArg> decoding_constraint = std::nullopt;

	// The arguments for the model data processor. Most of the time, the users
	// don't need to provide this argument.
	std::optional<DataProcessorArguments> args = std::nullopt;

	// The maximum number of tokens to generate during decode.
	std::optional<int> max_output_tokens = std::nullopt;

	// The task group id for asynchronous tasks. If provided, the task
	// controller will be stored and can be cancelled by calling
	// `Conversation::CancelGroup(task_group_id)`.
	std::optional<std::string> task_group_id = std::nullopt;

	// The extra template context passed into PromptTemplateInput. This extra
	// context only applies to a single message and is merged with the extra
	// context provided in the Preface, overwriting existing keys.
	std::optional<nlohmann::ordered_json> extra_context = std::nullopt;
	};

	// A multi-turn centric stateful Conversation API for high-level user
	// interaction. Conversation maintains the history for users, so the users'
	// messages will be used as the LLM context through the conversation.
	//
	// Conversation handles the complex data processing logic for Session usage,
	// including:
	// - Prompt template rendering.
	// - Role-based messages handling.
	// - Multimodal input processing.
	// - History management.
	// - Model-specific data processing.
	//
	// Example usage:
	//
	// // Create an Engine instance.
	// ASSIGN_OR_RETURN(auto engine, Engine::Create(model_assets));
	//
	// // Create a ConversationConfig instance from the Engine.
	// ASSIGN_OR_RETURN(auto conversation_config,
	// ConversationConfig::CreateDefault(*engine));
	//
	// // Create a Conversation instance.
	// ASSIGN_OR_RETURN(auto conversation,
	// Conversation::Create(*engine, conversation_config));
	//
	// // Send a message to the LLM and returns the complete message.
	// ASSIGN_OR_RETURN(const Message message,
	// conversation->SendMessage(JsonMessage{
	// {"role", "user"}, {"content", "Hello world!"}}));
	//
	// // Send a message to the LLM and process the asynchronous message results
	// // via the user_callback. The user_callback is a user-defined callback
	// // function that handles the message results.
	// EXPECT_OK(conversation->SendMessageAsync(
	// JsonMessage{{"role", "user"}, {"content", "Hello world!"}},
	// [](absl::StatusOr<Message> message) {
	// // Handle the message results.
	// if (message.ok()) {
	// std::cout << "Message: " << std::endl;
	// }
	// });
	//
	class Conversation {
	public:
	// Creates a Conversation instance from the the Engine and ConversationConfig.
	// Args:
	// - `engine`: The Engine instance to be used for creating the Conversation.
	// - `config`: The ConversationConfig instance to be used for creating the
	// Conversation.
	static absl::StatusOr<std::unique_ptr<Conversation>> Create(
	Engine& engine, const ConversationConfig& config);

	// Sends a message to the LLM and returns the complete message.
	// Args:
	// - `message`: The message to be sent to the LLM. If `message` is an array,
	// each element will be treated as a separate message and be prefilled
	// before generating the response.
	// - `optional_args`: The optional arguments for sending the message. See the
	// definition of `OptionalArgs` for more details.
	// Returns :
	// - The complete message from the LLM.
	absl::StatusOr<Message> SendMessage(
	const Message& message, OptionalArgs optional_args = OptionalArgs());

	// Sends a message to the LLM and process the asynchronous message results via
	// the user_callback.
	// Args:
	// - `message`: The message to be sent to the LLM. If `message` is an array,
	// each element will be treated as a separate message and be prefilled
	// before generating the response.
	// - `user_callback`: The callback to receive the message events. The
	// user_callback will be invoked in the following conditions:
	// - On every new message chunk.
	// - When the generation is complete, the user_callback will be invoked
	// with an empty message.
	// - When the generation is cancelled, the user_callback will be invoked
	// with absl::CancelledError.
	// - When an error occurs, the user_callback will be invoked with the error
	// status.
	// - `optional_args`: The optional arguments for sending the message. See the
	// definition of `OptionalArgs` for more details.
	// Returns :
	// - absl::OkStatus if the message is sent and processing successfully,
	// otherwise the error status.
	absl::Status SendMessageAsync(
	const Message& message,
	absl::AnyInvocable<void(absl::StatusOr<Message>)> user_callback,
	OptionalArgs optional_args = OptionalArgs());

	// Scores the target text after the prefill process is done. This function
	// will run the decode process (with the existing context history) by feeding
	// in the provided target text tokens and fetch the decode output logits that
	// corresponds to the target text tokens. This is useful for running certain
	// scoring metrics, e.g. perplexity.
	// Note that the function will NOT update the conversation history or the
	// internal state of the Conversation. The existing context history will
	// remain the same after the function call.
	// Note also that the function will NOT apply any additional prompt template
	// to the target text as the goal is to get the score of the raw target text.
	// Args:
	// - target_text: The target text to score.
	// - returns: This function returns the score associated with each of the
	// target texts. The scores are the log likelihood of the target text
	// given the existing context history.
	absl::StatusOr<Responses> RunTextScoring(
	const std::vector<absl::string_view>& target_text,
	OptionalArgs optional_args = OptionalArgs());

	// Similar to the above RunTextScoring function, but this is a not blocking
	// call and the function will return right away. The processing status will
	// be signaled through the callback.
	absl::Status RunTextScoringAsync(
	const std::vector<absl::string_view>& target_text,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback,
	OptionalArgs optional_args = OptionalArgs());

	// Returns the history of the conversation.
	// Note: the return value is a copy of the history, which may be expensive
	// for large history.
	std::vector<Message> GetHistory() const {
	absl::MutexLock lock(&history_mutex_); // NOLINT
	return history_;
	}

	// Provides safe access to the conversation history without copying.
	// The provided visitor function is executed while the history mutex is held.
	// Args:
	// - visitor: The visitor function takes a const reference to the history
	// vector.
	//
	// Example usage:
	//
	// Message assistant_message;
	// conversation->AccessHistory(
	// [&assistant_message](const std::vector<Message>& history) {
	// // Copy the last message to assistant_message. So we don't need to
	// // copy the whole history, if we only need the last message.
	// assistant_message = history.back();
	// });
	void AccessHistory(absl::AnyInvocable<void(const std::vector<Message>&) const>
	visitor) const {
	absl::MutexLock lock(&history_mutex_); // NOLINT
	visitor(history_);
	}

	// Returns the configuration used for creating the Conversation.
	const ConversationConfig& GetConfig() const { return config_; }

	// Returns the benchmark info for the conversation. Under the hood, this
	// method triggers the benchmark info collection from the Session. Returns:
	// - The benchmark info for the conversation.
	absl::StatusOr<BenchmarkInfo> GetBenchmarkInfo();

	// Returns the mutable benchmark info for the conversation. Under the hood,
	// this method triggers the mutable benchmark info collection from the
	// Session. Returns:
	// - The mutable benchmark info for the conversation.
	absl::StatusOr<BenchmarkInfo*> GetMutableBenchmarkInfo();

	// Cancels the ongoing inference process, for asynchronous inference.
	// Note: the underlying Session is not rollbacked, so the message
	// from the user is actually sent to the LLM and processed for prefill.
	void CancelProcess();

	// Clones the conversation. The cloned conversation will be independent of the
	// original conversation, including the history, state, etc.
	//
	// Note that the cloned conversation will not clone the group_id of the
	// ongoing tasks.
	absl::StatusOr<std::unique_ptr<Conversation>> Clone();

	// Cancels all ongoing asynchronous tasks with the given task_group_id.
	// Args:
	// - `task_group_id`: The id of the task group to cancel.
	// Note: after the cancellation, there is no guarantee that the internal state
	// of the Conversation is intact and therefore it is recommended to not
	// continue using the Conversation after cancellation.
	void CancelGroup(absl::string_view task_group_id);

	private:
	explicit Conversation(
	Engine& engine, std::unique_ptr<Engine::Session> session,
	std::unique_ptr<ModelDataProcessor> model_data_processor, Preface preface,
	PromptTemplate prompt_template, ConversationConfig config,
	std::unique_ptr<ConstraintProvider> constraint_provider = nullptr)
	: engine_(engine),
	model_data_processor_(std::move(model_data_processor)),
	preface_(preface),
	prompt_template_(std::move(prompt_template)),
	config_(config),
	constraint_provider_(std::move(constraint_provider)),
	session_(std::move(session)) {}

	absl::StatusOr<std::string> GetSingleTurnText(
	const Message& message, const OptionalArgs& optional_args);

	absl::StatusOr<std::string> GetSingleTurnTextFromFullHistory(
	const JsonMessage& json_message, const OptionalArgs& optional_args);

	absl::StatusOr<std::string> GetSingleTurnTextFromSingleTurnTemplate(
	const JsonMessage& json_message, const OptionalArgs& optional_args);

	absl::StatusOr<DecodeConfig> CreateDecodeConfig(
	std::optional<ConstraintArg> decoding_constraint = std::nullopt,
	std::optional<int> max_output_tokens = std::nullopt);

	// Adds a task controller to the task_controllers_ map if task_group_id is
	// provided.
	// Args:
	// - `task_group_id`: The id of the task group to add the controller to.
	// - `task_controller`: The task controller to add.
	void AddTaskController(
	const std::optional<std::string>& task_group_id,
	std::unique_ptr<Engine::Session::TaskController> task_controller);

	// Returns the prefill text for the given messages.
	//
	// The prefill text is obtained by taking the difference between the rendered
	// string when the template context contains only the old message and the
	// rendered string when the template context contains both the new and old
	// messages.
	//
	// Args:
	// - `old_messages`: The old messages that have already been prefilled.
	// - `new_messages`: The new messages to be prefilled.
	// - `optional_args`: The optional arguments for template rendering.
	absl::StatusOr<std::string> GetPrefillTextForMessages(
	absl::Span<const Message> old_messages,
	absl::Span<const Message> new_messages,
	const OptionalArgs& optional_args = OptionalArgs());

	// Returns the input data vector for the given messages.
	//
	// Gets the prefill text for `new_messages` and converts it to an input data
	// vector for `Session::RunPrefill`.
	//
	// Args:
	// - `old_messages`: The old messages that have already been prefilled.
	// - `new_messages`: The new messages to be prefilled.
	// - `optional_args`: The optional arguments for template rendering.
	absl::StatusOr<std::vector<InputData>> GetInputDataVectorForMessages(
	absl::Span<const Message> old_messages,
	absl::Span<const Message> new_messages,
	const OptionalArgs& optional_args = OptionalArgs());

	// Rewinds the session to the checkpoint after the most recent channel content
	// and return the input data vector for all messages from that point onward.
	absl::StatusOr<std::vector<InputData>> RewindAndGetInputDataVector();

	// Keep a reference to the creator engine to enable access to the shared
	// resources that might be required for features like cloning.
	Engine& engine_;
	std::unique_ptr<ModelDataProcessor> model_data_processor_;
	Preface preface_;
	PromptTemplate prompt_template_;
	// The constraint is currently created from the tools defined in the preface,
	// if any.
	std::unique_ptr<Constraint> constraint_;
	const ConversationConfig config_;
	std::unique_ptr<ConstraintProvider> constraint_provider_ = nullptr;
	mutable absl::Mutex history_mutex_;
	std::vector<Message> history_ ABSL_GUARDED_BY(history_mutex_);

	// Whether the current conversation is in message appending state.
	bool is_appending_message_ = false;

	// Mutex for task_controllers_.
	mutable absl::Mutex task_controllers_mutex_;
	// Map of task group id to task controllers.
	absl::flat_hash_map<
	std::string,
	std::vector<std::unique_ptr<Engine::Session::TaskController>>>
	task_controllers_ ABSL_GUARDED_BY(task_controllers_mutex_);

	// Declare the session after model_data_processor_ and other members it
	// depends on so that the session is destroyed before them. This is to avoid
	// memory corruption and null-pointer deference issues.
	std::unique_ptr<Engine::Session> session_;

	// Whether checkpointing and rewinding are supported by the session.

	// Assumed to be true initially but on the first error from SaveCheckpoint,
	// will be set to false. Rewinding is supported by SessionBasic but not by
	// SessionAdvanced.
	//
	// TODO(b/494425377): Support rewinding in SessionAdvanced and remove
	// session_checkpoint_supported_.
	bool session_checkpoint_supported_ = true;

	// The index of the message you have to rewind to in order to remove channel
	// content from the KV cache. nullopt means no rewind is needed.
	std::optional<int> checkpoint_message_index_ = std::nullopt;
	};
	} // namespace litert::lm

	#endif // THIRD_PARTY_ODML_LITERT_LM_RUNTIME_CONVERSATION_CONVERSATION_H_