Spaces:

FINAL-Bench
/

LiteRT-LM

Running

App Files Files Community

LiteRT-LM / runtime /conversation /conversation_test.cc

SeaWolf-AI

Upload full LiteRT-LM codebase

5f923cd verified 5 days ago

raw

history blame contribute delete

109 kB

	// Copyright 2025 The ODML Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include "runtime/conversation/conversation.h"

	#include <filesystem> // NOLINT: Required for path manipulation.
	#include <fstream>
	#include <ios>
	#include <memory>
	#include <optional>
	#include <sstream>
	#include <string>
	#include <utility>
	#include <variant>
	#include <vector>

	#include <gmock/gmock.h>
	#include <gtest/gtest.h>
	#include "absl/container/flat_hash_map.h" // from @com_google_absl
	#include "absl/functional/any_invocable.h" // from @com_google_absl
	#include "absl/status/status.h" // from @com_google_absl
	#include "absl/status/statusor.h" // from @com_google_absl
	#include "absl/strings/match.h" // from @com_google_absl
	#include "absl/strings/str_cat.h" // from @com_google_absl
	#include "absl/strings/string_view.h" // from @com_google_absl
	#include "absl/synchronization/notification.h" // from @com_google_absl
	#include "absl/time/clock.h" // from @com_google_absl
	#include "absl/time/time.h" // from @com_google_absl
	#include "nlohmann/json.hpp" // from @nlohmann_json
	#include "runtime/components/constrained_decoding/bitmap.h"
	#include "runtime/components/constrained_decoding/constraint.h"
	#include "runtime/components/constrained_decoding/external_constraint_config.h"
	#include "runtime/components/prompt_template.h"
	#include "runtime/components/sentencepiece_tokenizer.h"
	#include "runtime/components/tokenizer.h"
	#include "runtime/conversation/io_types.h"
	#include "runtime/engine/engine.h"
	#include "runtime/engine/engine_factory.h"
	#include "runtime/engine/engine_settings.h"
	#include "runtime/engine/io_types.h"
	#include "runtime/executor/executor_settings_base.h"
	#include "runtime/util/test_utils.h" // IWYU pragma: keep

	namespace litert::lm {
	namespace {

	using ::testing::AllOf;
	using ::testing::ElementsAre;
	using ::testing::HasSubstr;
	using ::testing::Not;
	using ::testing::Return;
	using ::testing::VariantWith;

	absl::string_view kTestLlmPath =
	"litert_lm/runtime/testdata/test_lm.litertlm";

	constexpr char kTestTokenizerPath[] =
	"litert_lm/runtime/components/testdata/gemma3_sentencepiece.model";

	constexpr char kGemma3ToolsMultiPrefillTemplatePath[] =
	"litert_lm/runtime/components/testdata/"
	"google-gemma-3n-e2b-it-tools-multi-prefill.jinja";

	constexpr char kGemma3TemplatePath[] =
	"litert_lm/runtime/components/testdata/google-gemma-3-1b-it.jinja";

	constexpr absl::string_view kTestJinjaPromptTemplate = R"jinja(
	{%- for message in messages -%}
	{{- '<start_of_turn>' + message.role + '\n' -}}
	{%- if message.content is string -%}
	{{- message.content + '<end_of_turn>\n' -}}
	{%- else -%}
	{{- message.content[0].text + '<end_of_turn>\n' -}}
	{%- endif -%}
	{%- endfor -%}
	)jinja";

	std::string GetTestdataPath(absl::string_view file_path) {
	return absl::StrCat(::testing::SrcDir(), "/", file_path);
	}

	std::string ReadFile(absl::string_view path) {
	std::ifstream ifstr(std::string(path), std::ios::binary);
	std::stringstream contents;
	contents << ifstr.rdbuf();
	return contents.str();
	}

	class MockSession : public Engine::Session {
	public:
	MOCK_METHOD(absl::StatusOr<Responses>, GenerateContent,
	(const std::vector<InputData>& contents), (override));
	MOCK_METHOD(
	absl::Status, GenerateContentStream,
	(const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback),
	(override));
	MOCK_METHOD(
	absl::Status, GenerateContentStream,
	(const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback,
	const DecodeConfig& decode_config),
	(override));
	MOCK_METHOD(absl::StatusOr<Responses>, RunTextScoring,
	(const std::vector<absl::string_view>& target_text,
	bool store_token_lengths),
	(override));
	MOCK_METHOD(absl::StatusOr<std::unique_ptr<Engine::Session::TaskController>>,
	RunTextScoringAsync,
	(const std::vector<absl::string_view>& target_text,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback,
	bool store_token_lengths),
	(override));

	MOCK_METHOD(absl::Status, RunPrefill,
	(const std::vector<InputData>& contents), (override));
	MOCK_METHOD(
	absl::StatusOr<std::unique_ptr<Engine::Session::TaskController>>,
	RunPrefillAsync,
	(const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback),
	(override));
	MOCK_METHOD(absl::StatusOr<Responses>, RunDecode, (), (override));
	MOCK_METHOD(absl::StatusOr<Responses>, RunDecode,
	(const DecodeConfig& decode_config), (override));
	MOCK_METHOD(
	absl::StatusOr<std::unique_ptr<Engine::Session::TaskController>>,
	RunDecodeAsync,
	(absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback),
	(override));
	MOCK_METHOD(
	absl::StatusOr<std::unique_ptr<Engine::Session::TaskController>>,
	RunDecodeAsync,
	(absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback,
	const DecodeConfig& decode_config),
	(override));
	MOCK_METHOD(absl::StatusOr<std::unique_ptr<Session>>, Clone, (), (override));
	MOCK_METHOD(absl::StatusOr<std::unique_ptr<Session>>, CloneAsync,
	(absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback),
	(override));
	MOCK_METHOD(absl::StatusOr<BenchmarkInfo>, GetBenchmarkInfo, (), (override));
	MOCK_METHOD(absl::StatusOr<BenchmarkInfo*>, GetMutableBenchmarkInfo, (),
	(override));
	MOCK_METHOD(void, CancelProcess, (), (override));
	MOCK_METHOD(absl::Status, WaitUntilDone, (), (override));
	MOCK_METHOD(absl::Status, SaveCheckpoint, (absl::string_view label),
	(override));
	MOCK_METHOD(absl::Status, RewindToCheckpoint, (absl::string_view label),
	(override));
	MOCK_METHOD(const SessionConfig&, GetSessionConfig, (), (const, override));
	};

	class MockEngine : public Engine {
	public:
	MOCK_METHOD(const EngineSettings&, GetEngineSettings, (), (const, override));
	MOCK_METHOD(const Tokenizer&, GetTokenizer, (), (const, override));
	MOCK_METHOD(absl::StatusOr<AudioExecutorProperties>,
	GetAudioExecutorProperties, (), (const, override));
	MOCK_METHOD(absl::StatusOr<VisionExecutorProperties>,
	GetVisionExecutorProperties, (), (const, override));
	MOCK_METHOD(absl::StatusOr<std::unique_ptr<Session>>, CreateSession,
	(const SessionConfig& session_config), (override));
	MOCK_METHOD(absl::Status, WaitUntilDone, (absl::Duration timeout),
	(override));
	};

	class MockTaskController : public Engine::Session::TaskController {
	public:
	MockTaskController() = default;
	~MockTaskController() override = default;
	MOCK_METHOD(absl::Status, Cancel, (), (override));
	};

	absl::AnyInvocable<void(absl::StatusOr<Message>)> CreateTestMessageCallback(
	Message& expected_message, absl::Notification& done) {
	return [&expected_message, &done](absl::StatusOr<Message> message) mutable {
	// If the message is not ok, fail the test.
	if (!message.ok()) {
	FAIL() << "Message user_callback failed: " << message.status();
	return;
	}
	// If the message is null, the last callback is received.
	if (auto json_message = std::get_if<JsonMessage>(&message.value());
	json_message->is_null()) {
	JsonMessage& expected_json_message =
	std::get<JsonMessage>(expected_message);
	ASSERT_TRUE(expected_json_message["content"][0]["text"].is_string());
	std::string expected_string = expected_json_message["content"][0]["text"];
	// The expected string should be empty after the last callback.
	EXPECT_TRUE(expected_string.empty());
	done.Notify();
	return;
	}
	// Otherwise, this is a partial response.
	if (auto json_message = std::get_if<JsonMessage>(&message.value())) {
	JsonMessage& expected_json_message =
	std::get<JsonMessage>(expected_message);
	// Compare the message text content by prefix, and update the expected
	// message to the remaining text for the next user_callback.
	ASSERT_TRUE(expected_json_message["content"][0]["text"].is_string());
	ASSERT_TRUE((*json_message)["content"][0]["text"].is_string());
	std::string expected_string = expected_json_message["content"][0]["text"];
	std::string actual_string = (*json_message)["content"][0]["text"];
	EXPECT_TRUE(absl::StartsWith(expected_string, actual_string))
	<< "Expected: " << expected_string << "\nActual: " << actual_string;
	expected_json_message["content"][0]["text"] =
	expected_string.substr(actual_string.size());
	}
	};
	}

	absl::AnyInvocable<void(absl::StatusOr<Message>)>
	CreateTestMultiMessageCallback(const std::vector<Message>& expected_messages,
	absl::Notification& done) {
	return [&expected_messages, &done,
	current_index = 0](absl::StatusOr<Message> message) mutable {
	ASSERT_OK(message);
	ASSERT_TRUE(std::holds_alternative<JsonMessage>(message.value()));
	auto json_message = std::get<JsonMessage>(message.value());

	// If the message is null, the message stream is complete.
	if (json_message.is_null()) {
	EXPECT_TRUE(current_index == expected_messages.size())
	<< "Expected " << expected_messages.size()
	<< " messages but only got " << current_index;
	done.Notify();
	return;
	}

	ASSERT_LT(current_index, expected_messages.size())
	<< "Received more messages than expected. Expected size: "
	<< expected_messages.size();
	EXPECT_THAT(*message, testing::Eq(expected_messages[current_index]));
	++current_index;
	};
	}

	TEST(ConversationConfigTest, CreateDefault) {
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	engine_settings.GetMutableMainExecutorSettings().SetCacheDir(":nocache");
	engine_settings.GetMutableMainExecutorSettings().SetMaxNumTokens(10);
	ASSERT_OK_AND_ASSIGN(auto engine, EngineFactory::CreateAny(engine_settings));
	ASSERT_OK_AND_ASSIGN(auto config, ConversationConfig::CreateDefault(*engine));
	EXPECT_OK(Conversation::Create(*engine, config));
	}

	TEST(ConversationConfigTest, CreateDefaultWithOverwritePromptTemplate) {
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	engine_settings.GetMutableMainExecutorSettings().SetCacheDir(":nocache");
	engine_settings.GetMutableMainExecutorSettings().SetMaxNumTokens(10);
	ASSERT_OK_AND_ASSIGN(auto engine, EngineFactory::CreateAny(engine_settings));
	ASSERT_OK_AND_ASSIGN(auto config, ConversationConfig::Builder()
	.SetOverwritePromptTemplate(
	PromptTemplate("Hello world!"))
	.Build(*engine));
	EXPECT_EQ(config.GetPromptTemplate().GetTemplateSource(), "Hello world!");
	EXPECT_TRUE(
	config.GetSessionConfig().GetPromptTemplates().user().prefix().empty());
	}

	TEST(ConversationConfigTest, CreateWithBuilder) {
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	engine_settings.GetMutableMainExecutorSettings().SetCacheDir(":nocache");
	engine_settings.GetMutableMainExecutorSettings().SetMaxNumTokens(10);
	ASSERT_OK_AND_ASSIGN(auto engine, EngineFactory::CreateAny(engine_settings));

	auto session_config = SessionConfig::CreateDefault();
	session_config.GetMutableLlmModelType().mutable_gemma3n();

	ASSERT_OK_AND_ASSIGN(
	auto config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config)
	.SetPreface(JsonPreface{
	.messages = {{{"role", "system"},
	{"content", "You are a helpful assistant."}}}})
	.Build(*engine));
	EXPECT_TRUE(std::holds_alternative<JsonPreface>(config.GetPreface()));
	EXPECT_EQ(
	std::get<JsonPreface>(config.GetPreface()).messages,
	nlohmann::ordered_json(
	{{{"role", "system"}, {"content", "You are a helpful assistant."}}}));
	EXPECT_EQ(config.GetSessionConfig().GetLlmModelType().model_type_case(),
	proto::LlmModelType::kGemma3N);
	EXPECT_TRUE(
	config.GetSessionConfig().GetPromptTemplates().user().prefix().empty());
	EXPECT_OK(Conversation::Create(*engine, config));
	}

	TEST(ConversationConfigTest, FilterChannelContentFromKvCache) {
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	engine_settings.GetMutableMainExecutorSettings().SetCacheDir(":nocache");
	engine_settings.GetMutableMainExecutorSettings().SetMaxNumTokens(10);
	ASSERT_OK_AND_ASSIGN(auto engine, EngineFactory::CreateAny(engine_settings));

	ASSERT_OK_AND_ASSIGN(auto config,
	ConversationConfig::Builder()
	.SetFilterChannelContentFromKvCache(true)
	.Build(*engine));
	EXPECT_TRUE(config.filter_channel_content_from_kv_cache());
	}

	TEST(ConversationConfigTest, OverwritePromptTemplate) {
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	engine_settings.GetMutableMainExecutorSettings().SetCacheDir(":nocache");
	engine_settings.GetMutableMainExecutorSettings().SetMaxNumTokens(10);

	ASSERT_OK_AND_ASSIGN(auto engine, EngineFactory::CreateAny(engine_settings));
	ASSERT_OK_AND_ASSIGN(
	auto config,
	ConversationConfig::Builder()
	.SetOverwritePromptTemplate(PromptTemplate("overwrite template"))
	.Build(*engine));

	EXPECT_EQ(config.GetPromptTemplate().GetTemplateSource(),
	"overwrite template");
	}

	struct ConversationTestParams {
	bool enable_constrained_decoding;
	bool prefill_preface_on_init;
	};

	class ConversationTest : public testing::TestWithParam<ConversationTestParams> {
	public:
	static std::vector<ConversationTestParams> GetTestParams() {
	std::vector<ConversationTestParams> params;
	for (bool enable_constrained_decoding : {true, false}) {
	for (bool prefill_preface_on_init : {true, false}) {
	params.push_back(
	{enable_constrained_decoding, prefill_preface_on_init});
	}
	}
	return params;
	}

	protected:
	void SetUp() override {
	ASSERT_OK_AND_ASSIGN(
	tokenizer_,
	SentencePieceTokenizer::CreateFromFile(
	(std::filesystem::path(::testing::SrcDir()) / kTestTokenizerPath)
	.string()));
	model_assets_ = ModelAssets::Create(GetTestdataPath(kTestLlmPath));
	ASSERT_OK(model_assets_);
	engine_settings_ =
	EngineSettings::CreateDefault(*model_assets_, Backend::CPU);
	ASSERT_OK(engine_settings_);

	session_config_ = SessionConfig::CreateDefault();
	session_config_.SetStartTokenId(0);
	session_config_.GetMutableStopTokenIds().push_back({1});
	*session_config_.GetMutableLlmModelType().mutable_gemma3() = {};
	}

	std::unique_ptr<MockSession> CreateMockSession() {
	auto mock_session = std::make_unique<MockSession>();
	EXPECT_CALL(*mock_session, GetSessionConfig())
	.WillRepeatedly(testing::ReturnRef(session_config_));
	return mock_session;
	}

	std::unique_ptr<MockEngine> CreateMockEngine(
	std::unique_ptr<MockSession> mock_session) {
	auto mock_engine = std::make_unique<MockEngine>();
	EXPECT_CALL(*mock_engine, GetEngineSettings())
	.WillRepeatedly(testing::ReturnRef(*engine_settings_));
	EXPECT_CALL(*mock_engine, CreateSession(testing::_))
	.WillOnce(testing::Return(std::move(mock_session)));
	EXPECT_CALL(*mock_engine, GetTokenizer())
	.WillRepeatedly(testing::ReturnRef(*tokenizer_));
	return mock_engine;
	}

	std::unique_ptr<Tokenizer> tokenizer_;
	absl::StatusOr<ModelAssets> model_assets_;
	absl::StatusOr<EngineSettings> engine_settings_;
	SessionConfig session_config_ = SessionConfig::CreateDefault();
	bool enable_constrained_decoding_ = GetParam().enable_constrained_decoding;
	bool prefill_preface_on_init_ = GetParam().prefill_preface_on_init;
	};

	TEST_P(ConversationTest, SendMessage) {
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	engine_settings.GetMutableMainExecutorSettings().SetCacheDir(":nocache");
	engine_settings.GetMutableMainExecutorSettings().SetMaxNumTokens(10);
	ASSERT_OK_AND_ASSIGN(auto engine, EngineFactory::CreateAny(engine_settings));

	ASSERT_OK_AND_ASSIGN(
	auto config,
	ConversationConfig::Builder()
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.Build(*engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*engine, config));
	EXPECT_THAT(conversation->GetHistory(), testing::IsEmpty());
	JsonMessage user_message = {{"role", "user"}, {"content", "Hello world!"}};
	ASSERT_OK_AND_ASSIGN(const Message message,
	conversation->SendMessage(user_message));
	// The expected message is just some gibberish text, because the test LLM has
	// random weights.
	JsonMessage expected_message = {
	{"role", "assistant"},
	{"content",
	{{{"type", "text"}, {"text", "TarefaByte دارایेत्र investigaciónప్రదేశ"}}}}};
	const JsonMessage& json_message = std::get<JsonMessage>(message);
	EXPECT_EQ(json_message, expected_message);
	EXPECT_THAT(conversation->GetHistory(),
	testing::ElementsAre(user_message, expected_message));
	}

	TEST_P(ConversationTest, SendMessageGemma3Template) {
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	engine_settings.GetMutableMainExecutorSettings().SetCacheDir(":nocache");
	engine_settings.GetMutableMainExecutorSettings().SetMaxNumTokens(20);
	ASSERT_OK_AND_ASSIGN(auto engine, EngineFactory::CreateAny(engine_settings));

	std::string gemma3_prompt_template =
	ReadFile(GetTestdataPath(kGemma3TemplatePath));

	ASSERT_OK_AND_ASSIGN(
	auto config,
	ConversationConfig::Builder()
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.SetOverwritePromptTemplate(PromptTemplate(gemma3_prompt_template))
	.Build(*engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*engine, config));
	EXPECT_THAT(conversation->GetHistory(), testing::IsEmpty());
	JsonMessage user_message = {{"role", "user"}, {"content", "Hello world!"}};
	EXPECT_OK(conversation->SendMessage(user_message));
	}

	TEST_P(ConversationTest, SendSingleMessage) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// We will send a single message.
	JsonMessage user_message = {{"role", "user"}, {"content", "How are you?"}};

	absl::string_view expected_input_text =
	"<start_of_turn>user\n"
	"How are you?<end_of_turn>\n";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_input_text)))))
	.WillOnce(testing::Return(absl::OkStatus()));
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(
	testing::Return(Responses(TaskState::kProcessing, {"I am good."})));

	ASSERT_OK_AND_ASSIGN(const Message response,
	conversation->SendMessage(user_message));

	JsonMessage assistant_message = nlohmann::ordered_json::parse(R"({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "I am good."
	}
	]
	})");
	EXPECT_EQ(std::get<JsonMessage>(response), assistant_message);
	EXPECT_THAT(conversation->GetHistory(),
	testing::ElementsAre(user_message, assistant_message));
	}

	TEST_P(ConversationTest, SendSingleMessageWithExtraContext) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation and overwrite prompt template.
	absl::string_view prompt_template = R"jinja(
	{%- if enable_thinking -%}
	<start_of_turn>system
	Thinking enabled.<end_of_turn>
	{% else %}
	<start_of_turn>system
	Thinking disabled.<end_of_turn>
	{%- endif -%}
	{%- for message in messages -%}
	{{- '<start_of_turn>' + message.role + '\n' -}}
	{%- if message.content is string -%}
	{{- message.content + '<end_of_turn>\n' -}}
	{%- else -%}
	{{- message.content[0].text + '<end_of_turn>\n' -}}
	{%- endif -%}
	{%- endfor -%}
	)jinja";

	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetOverwritePromptTemplate(PromptTemplate(prompt_template))
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// We will send a single message.
	JsonMessage user_message = {{"role", "user"}, {"content", "How are you?"}};
	OptionalArgs optional_args;
	optional_args.extra_context = absl::flat_hash_map<std::string, std::string>{
	{"enable_thinking", "true"}};

	absl::string_view expected_input_text =
	"<start_of_turn>system\nThinking enabled.<end_of_turn>\n"
	"<start_of_turn>user\n"
	"How are you?<end_of_turn>\n";

	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_input_text)))))
	.WillOnce(testing::Return(absl::OkStatus()));
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(
	testing::Return(Responses(TaskState::kProcessing, {"I am good."})));

	ASSERT_OK_AND_ASSIGN(
	const Message response,
	conversation->SendMessage(user_message, std::move(optional_args)));

	JsonMessage assistant_message = nlohmann::ordered_json::parse(R"({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "I am good."
	}
	]
	})");
	EXPECT_EQ(std::get<JsonMessage>(response), assistant_message);
	EXPECT_THAT(conversation->GetHistory(),
	testing::ElementsAre(user_message, assistant_message));
	}

	TEST_P(ConversationTest, SendSingleMessageWithExtraContextOverwritingPreface) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation and overwrite prompt template.
	absl::string_view prompt_template = R"jinja(
	{%- if key1 -%}
	Key1: {{ key1 + "\n"}}
	{%- endif -%}
	{%- if key2 -%}
	Key2: {{ key2 + "\n"}}
	{%- endif -%}
	{%- if key3 -%}
	Key3: {{ key3 + "\n"}}
	{%- endif -%}
	{%- for message in messages -%}
	{{- '<start_of_turn>' + message.role + '\n' -}}
	{%- if message.content is string -%}
	{{- message.content + '<end_of_turn>\n' -}}
	{%- else -%}
	{{- message.content[0].text + '<end_of_turn>\n' -}}
	{%- endif -%}
	{%- endfor -%}
	)jinja";

	JsonPreface preface;

	// This extra context will be set at the Conversation level.
	preface.extra_context = {{"key1", "val1"}, {"key2", "val2"}};

	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetPreface(preface)
	.SetOverwritePromptTemplate(PromptTemplate(prompt_template))
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// We will send a single message with extra context that overwrites key1 and
	// adds key3.
	JsonMessage user_message = {{"role", "user"}, {"content", "How are you?"}};
	OptionalArgs optional_args;
	optional_args.extra_context =
	nlohmann::ordered_json{{"key1", "val1_new"}, {"key3", "val3"}};

	// key1 should be overwritten to val1_new.
	// key2 should remain val2.
	// key3 should be added as val3.
	absl::string_view expected_input_text =
	"Key1: val1_new\n"
	"Key2: val2\n"
	"Key3: val3\n"
	"<start_of_turn>user\n"
	"How are you?<end_of_turn>\n";

	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_input_text)))))
	.WillOnce(testing::Return(absl::OkStatus()));
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(
	testing::Return(Responses(TaskState::kProcessing, {"I am good."})));

	ASSERT_OK_AND_ASSIGN(
	const Message response,
	conversation->SendMessage(user_message, std::move(optional_args)));

	JsonMessage assistant_message = nlohmann::ordered_json::parse(R"({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "I am good."
	}
	]
	})");
	EXPECT_EQ(std::get<JsonMessage>(response), assistant_message);
	}

	TEST_P(ConversationTest, SendMultipleMessages) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// We will send two consecutive messages.
	JsonMessage user_messages = nlohmann::ordered_json::parse(R"json(
	[
	{
	"role": "user",
	"content": "Hello world!"
	},
	{
	"role": "user",
	"content": "How are you?"
	}
	]
	)json");

	absl::string_view expected_input_text =
	"<start_of_turn>user\n"
	"Hello world!<end_of_turn>\n"
	"<start_of_turn>user\n"
	"How are you?<end_of_turn>\n";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_input_text)))))
	.WillOnce(testing::Return(absl::OkStatus()));
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(
	testing::Return(Responses(TaskState::kProcessing, {"I am good."})));

	ASSERT_OK_AND_ASSIGN(const Message response,
	conversation->SendMessage(user_messages));

	JsonMessage assistant_message = nlohmann::ordered_json::parse(R"({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "I am good."
	}
	]
	})");
	EXPECT_EQ(std::get<JsonMessage>(response), assistant_message);
	EXPECT_THAT(conversation->GetHistory(),
	testing::ElementsAre(user_messages[0], user_messages[1],
	assistant_message));
	}

	TEST_P(ConversationTest, SendSingleMessageWithChannel) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.SetChannels({litert::lm::Channel{
	.channel_name = "thought",
	.start = "<\|channel>thought\n",
	.end = "<channel\|>",
	}})
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// Send a single message.
	JsonMessage user_message = {{"role", "user"}, {"content", "How are you?"}};

	absl::string_view expected_input_text =
	"<start_of_turn>user\n"
	"How are you?<end_of_turn>\n";

	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_input_text)))))
	.WillOnce(testing::Return(absl::OkStatus()));
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(testing::Return(
	Responses(TaskState::kProcessing,
	{"<\|channel>thought\nhmm<channel\|>I am good."})));

	// Send the message.
	ASSERT_OK_AND_ASSIGN(const Message response,
	conversation->SendMessage(user_message));

	JsonMessage assistant_message = nlohmann::ordered_json::parse(R"({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "I am good."
	}
	],
	"channels": {
	"thought": "hmm"
	}
	})");
	EXPECT_THAT(std::get<JsonMessage>(response), testing::Eq(assistant_message));
	EXPECT_THAT(conversation->GetHistory(),
	testing::ElementsAre(user_message, assistant_message));
	}

	TEST_P(ConversationTest, SendSingleMessageWithChannelQwenThink) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.SetChannels({litert::lm::Channel{
	.channel_name = "thought",
	.start = "<think>\n",
	.end = "\n</think>",
	}})
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// Send a single message.
	JsonMessage user_message = {{"role", "user"}, {"content", "How are you?"}};

	absl::string_view expected_input_text =
	"<start_of_turn>user\n"
	"How are you?<end_of_turn>\n";

	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_input_text)))))
	.WillOnce(testing::Return(absl::OkStatus()));
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(testing::Return(Responses(
	TaskState::kProcessing, {"<think>\nhmm\n</think>I am good."})));

	// Send the message.
	ASSERT_OK_AND_ASSIGN(const Message response,
	conversation->SendMessage(user_message));

	JsonMessage assistant_message = nlohmann::ordered_json::parse(R"({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "I am good."
	}
	],
	"channels": {
	"thought": "hmm"
	}
	})");
	EXPECT_THAT(std::get<JsonMessage>(response), testing::Eq(assistant_message));
	EXPECT_THAT(conversation->GetHistory(),
	testing::ElementsAre(user_message, assistant_message));
	}

	TEST_P(ConversationTest, SendMessageWithChannelContentFiltering) {
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Helper to get the raw text string from `InputText`.
	auto get_text = [](const InputText& it) -> std::string {
	auto status_or_view = it.GetRawTextString();
	if (!status_or_view.ok()) return "";
	return std::string(*status_or_view);
	};

	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.SetFilterChannelContentFromKvCache(true)
	.SetChannels({litert::lm::Channel{
	.channel_name = "thought",
	.start = "<\|channel>thought\n",
	.end = "<channel\|>",
	}})
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// Expect prefill of first user message.
	EXPECT_CALL(*mock_session_ptr, RunPrefill(testing::_))
	.WillOnce(Return(absl::OkStatus()));

	// Expect checkpoint to be saved after the first user message is prefilled.
	EXPECT_CALL(*mock_session_ptr, SaveCheckpoint("channel_content_checkpoint"))
	.WillOnce(Return(absl::OkStatus()));

	// Expect decode after first user message. Return response with channel
	// content.
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(
	Return(Responses(TaskState::kProcessing,
	{"<\|channel>thought\nhmm<channel\|>I am good."})));

	// Send the first user message.
	JsonMessage user_message_1 = {{"role", "user"}, {"content", "How are you?"}};
	ASSERT_OK(conversation->SendMessage(user_message_1));

	// Expect rewind to checkpoint after second user message is sent.
	EXPECT_CALL(*mock_session_ptr,
	RewindToCheckpoint("channel_content_checkpoint"))
	.WillOnce(Return(absl::OkStatus()));

	// Expect previous assistant message and second user message to be prefilled
	// when the second user message is sent. The assistant message should not
	// have channel content.
	auto assistant_message_matcher =
	AllOf(HasSubstr("I am good."), Not(HasSubstr("hmm")));
	EXPECT_CALL(
	*mock_session_ptr,
	RunPrefill(ElementsAre(
	VariantWith<InputText>(ResultOf(get_text, assistant_message_matcher)),
	VariantWith<InputText>(
	ResultOf(get_text, HasSubstr("That's great."))))))
	.WillOnce(Return(absl::OkStatus()));

	// Expect a new checkpoint to be saved after the second user message is
	// prefilled.
	EXPECT_CALL(*mock_session_ptr, SaveCheckpoint("channel_content_checkpoint"))
	.WillOnce(Return(absl::OkStatus()));

	// Expect decode after second user message.
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(Return(Responses(TaskState::kProcessing, {"Thank you."})));

	// Send the second user message.
	JsonMessage user_message_2 = {{"role", "user"}, {"content", "That's great."}};
	ASSERT_OK(conversation->SendMessage(user_message_2));
	}

	TEST_P(ConversationTest, SendMultipleMessagesWithHistory) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// The first user message.
	JsonMessage user_message_1 = nlohmann::ordered_json::parse(R"json(
	{
	"role": "user",
	"content": "How are you?"
	}
	)json");
	EXPECT_CALL(*mock_session_ptr, RunPrefill(testing::_))
	.WillOnce(testing::Return(absl::OkStatus()));

	// The first assistant response.
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(
	testing::Return(Responses(TaskState::kProcessing, {"I am good."})));

	// Send the first user message to fill the history.
	ASSERT_OK(conversation->SendMessage(user_message_1));
	ASSERT_THAT(conversation->GetHistory().size(), testing::Eq(2));

	// We will send two consecutive messages when the history is not empty.
	JsonMessage user_messages = nlohmann::ordered_json::parse(R"json(
	[
	{
	"role": "user",
	"content": "foo"
	},
	{
	"role": "user",
	"content": "bar"
	}
	]
	)json");
	absl::string_view expected_input_text =
	"<start_of_turn>user\n"
	"foo<end_of_turn>\n"
	"<start_of_turn>user\n"
	"bar<end_of_turn>\n";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_input_text)))))
	.WillOnce(testing::Return(absl::OkStatus()));

	// The second assistant response.
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(testing::Return(Responses(TaskState::kProcessing, {"baz"})));

	// Send the user messages.
	ASSERT_OK(conversation->SendMessage(user_messages));

	// Check the history.
	JsonMessage assistant_message_1 = nlohmann::ordered_json::parse(R"({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "I am good."
	}
	]
	})");
	JsonMessage assistant_message_2 = nlohmann::ordered_json::parse(R"({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "baz"
	}
	]
	})");
	EXPECT_THAT(conversation->GetHistory(),
	testing::ElementsAre(user_message_1, assistant_message_1,
	user_messages[0], user_messages[1],
	assistant_message_2));
	}

	TEST_P(ConversationTest, RunTextScoring) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// Test sync scoring.
	auto cloned_session_sync = std::make_unique<MockSession>();
	EXPECT_CALL(*cloned_session_sync,
	RunTextScoring(testing::ElementsAre("I am good."), true))
	.WillOnce(
	testing::Return(Responses(TaskState::kProcessing, {"I am good."})));
	EXPECT_CALL(*mock_session_ptr, Clone())
	.WillOnce(testing::Return(std::move(cloned_session_sync)));

	ASSERT_OK_AND_ASSIGN(const Responses response,
	conversation->RunTextScoring({"I am good."}));
	EXPECT_EQ(response.GetTexts()[0], "I am good.");

	// Test async scoring.
	auto cloned_session_async = std::make_unique<MockSession>();
	EXPECT_CALL(
	*cloned_session_async,
	RunTextScoringAsync(testing::ElementsAre("I am good."), testing::_, true))
	.WillOnce([](const std::vector<absl::string_view>& target_text,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback,
	bool store_token_lengths) {
	callback(Responses(TaskState::kProcessing, {"I am good."}));
	return nullptr;
	});
	EXPECT_CALL(*mock_session_ptr, CloneAsync(testing::_))
	.WillOnce(testing::Return(std::move(cloned_session_async)));

	absl::Notification done;
	std::string response_text;
	EXPECT_OK(conversation->RunTextScoringAsync(
	{"I am good."}, [&](absl::StatusOr<Responses> responses) {
	ASSERT_OK(responses);
	response_text = responses->GetTexts()[0];
	done.Notify();
	}));
	done.WaitForNotificationWithTimeout(absl::Seconds(10));
	EXPECT_EQ(response_text, "I am good.");
	}

	TEST_P(ConversationTest, SendMessageAsync) {
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	engine_settings.GetMutableMainExecutorSettings().SetCacheDir(":nocache");
	engine_settings.GetMutableMainExecutorSettings().SetMaxNumTokens(10);
	ASSERT_OK_AND_ASSIGN(auto engine, EngineFactory::CreateAny(engine_settings));
	ASSERT_OK_AND_ASSIGN(
	auto config,
	ConversationConfig::Builder()
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.Build(*engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*engine, config));

	JsonMessage user_message = {{"role", "user"}, {"content", "Hello world!"}};
	// The expected message is just some gibberish text, because the test LLM has
	// random weights.
	Message expected_message =
	JsonMessage({{"role", "assistant"},
	{"content",
	{{{"type", "text"},
	{"text", "TarefaByte دارایेत्र investigaciónప్రదేశ"}}}}});
	Message expected_message_for_confirm = expected_message;

	absl::Notification done;
	EXPECT_OK(conversation->SendMessageAsync(
	user_message, CreateTestMessageCallback(expected_message, done)));
	// Wait for the async message to be processed.
	EXPECT_OK(engine->WaitUntilDone(absl::Seconds(100)));
	done.WaitForNotificationWithTimeout(absl::Seconds(10));
	EXPECT_THAT(conversation->GetHistory(),
	testing::ElementsAre(user_message, expected_message_for_confirm));
	}

	TEST_P(ConversationTest, SendSingleMessageAsync) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// We will send a single message.
	JsonMessage user_message = {{"role", "user"}, {"content", "How are you?"}};

	absl::string_view expected_input_text =
	"<start_of_turn>user\n"
	"How are you?<end_of_turn>\n";
	EXPECT_CALL(
	*mock_session_ptr,
	RunPrefillAsync(testing::ElementsAre(testing::VariantWith<InputText>(
	testing::Property(&InputText::GetRawTextString,
	expected_input_text))),
	testing::_))
	.WillOnce([](const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)>
	user_callback) {
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});
	EXPECT_CALL(*mock_session_ptr, RunDecodeAsync(testing::_, testing::_))
	.WillOnce(
	[](absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback,
	const DecodeConfig& decode_config) {
	user_callback(Responses(TaskState::kProcessing, {"I am good."}));
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});

	Message assistant_message = JsonMessage(nlohmann::ordered_json::parse(R"({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "I am good."
	}
	]
	})"));
	Message assistant_message_for_confirm = assistant_message;
	absl::Notification done;
	auto message_callback = CreateTestMessageCallback(assistant_message, done);
	EXPECT_OK(conversation->SendMessageAsync(user_message,
	std::move(message_callback)));
	done.WaitForNotificationWithTimeout(absl::Seconds(10));

	EXPECT_THAT(
	conversation->GetHistory(),
	testing::ElementsAre(user_message, assistant_message_for_confirm));
	}

	TEST_P(ConversationTest, SendMessageAsyncWithChannelContent) {
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	std::vector<Channel> custom_channels = {{"thought", "<think>", "</think>"}};
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.SetChannels(custom_channels)
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	JsonMessage user_message = {{"role", "user"}, {"content", "How are you?"}};

	absl::string_view expected_input_text =
	"<start_of_turn>user\n"
	"How are you?<end_of_turn>\n";
	EXPECT_CALL(
	*mock_session_ptr,
	RunPrefillAsync(testing::ElementsAre(testing::VariantWith<InputText>(
	testing::Property(&InputText::GetRawTextString,
	expected_input_text))),
	testing::_))
	.WillOnce([](const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)>
	user_callback) {
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});

	EXPECT_CALL(*mock_session_ptr, RunDecodeAsync(testing::_, testing::_))
	.WillOnce(
	[](absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback,
	const DecodeConfig& decode_config) {
	user_callback(Responses(TaskState::kProcessing,
	{"Hello <think>hmm</think> World!"}));
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});

	absl::Notification done;

	std::vector<Message> expected_messages = {
	JsonMessage{{"role", "assistant"},
	{"content", {{{"type", "text"}, {"text", "Hello "}}}}},
	JsonMessage{{"role", "assistant"}, {"channels", {{"thought", "hmm"}}}},
	JsonMessage{{"role", "assistant"},
	{"content", {{{"type", "text"}, {"text", " World!"}}}}},
	};
	auto message_callback =
	CreateTestMultiMessageCallback(expected_messages, done);
	EXPECT_OK(conversation->SendMessageAsync(user_message,
	std::move(message_callback)));
	done.WaitForNotificationWithTimeout(absl::Seconds(10));

	// Verify the final message in history.
	JsonMessage expected_assistant_message = nlohmann::ordered_json::parse(R"({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "Hello World!"
	}
	],
	"channels": {
	"thought": "hmm"
	}
	})");

	EXPECT_THAT(conversation->GetHistory(),
	testing::ElementsAre(user_message, expected_assistant_message));
	}

	TEST_P(ConversationTest, SendSingleMessageAsyncWithExtraContext) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation and overwrite prompt template.
	absl::string_view prompt_template = R"jinja(
	{%- if enable_thinking -%}
	<start_of_turn>system
	Thinking enabled.<end_of_turn>
	{% else %}
	<start_of_turn>system
	Thinking disabled.<end_of_turn>
	{%- endif -%}
	{%- for message in messages -%}
	{{- '<start_of_turn>' + message.role + '\n' -}}
	{%- if message.content is string -%}
	{{- message.content + '<end_of_turn>\n' -}}
	{%- else -%}
	{{- message.content[0].text + '<end_of_turn>\n' -}}
	{%- endif -%}
	{%- endfor -%}
	)jinja";

	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetOverwritePromptTemplate(PromptTemplate(prompt_template))
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// We will send a single message.
	JsonMessage user_message = {{"role", "user"}, {"content", "How are you?"}};
	OptionalArgs optional_args;
	optional_args.extra_context = absl::flat_hash_map<std::string, std::string>{
	{"enable_thinking", "true"}};

	absl::string_view expected_input_text =
	"<start_of_turn>system\nThinking enabled.<end_of_turn>\n"
	"<start_of_turn>user\n"
	"How are you?<end_of_turn>\n";

	EXPECT_CALL(
	*mock_session_ptr,
	RunPrefillAsync(testing::ElementsAre(testing::VariantWith<InputText>(
	testing::Property(&InputText::GetRawTextString,
	expected_input_text))),
	testing::_))
	.WillOnce([](const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)>
	user_callback) {
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});
	EXPECT_CALL(*mock_session_ptr, RunDecodeAsync(testing::_, testing::_))
	.WillOnce(
	[](absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback,
	const DecodeConfig& decode_config) {
	user_callback(
	Responses(TaskState::kProcessing, {"I am good async."}));
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});

	Message assistant_message = JsonMessage(nlohmann::ordered_json::parse(R"({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "I am good async."
	}
	]
	})"));
	Message assistant_message_for_confirm = assistant_message;
	absl::Notification done;
	auto message_callback = CreateTestMessageCallback(assistant_message, done);
	EXPECT_OK(conversation->SendMessageAsync(
	user_message, std::move(message_callback), std::move(optional_args)));
	done.WaitForNotificationWithTimeout(absl::Seconds(10));

	EXPECT_THAT(
	conversation->GetHistory(),
	testing::ElementsAre(user_message, assistant_message_for_confirm));
	}

	TEST_P(ConversationTest, SendMultipleMessagesAsync) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// We will send two consecutive messages.
	JsonMessage user_messages = nlohmann::ordered_json::parse(R"json(
	[
	{
	"role": "user",
	"content": "Hello world!"
	},
	{
	"role": "user",
	"content": "How are you?"
	}
	]
	)json");

	absl::string_view expected_input_text =
	"<start_of_turn>user\n"
	"Hello world!<end_of_turn>\n"
	"<start_of_turn>user\n"
	"How are you?<end_of_turn>\n";
	EXPECT_CALL(
	*mock_session_ptr,
	RunPrefillAsync(testing::ElementsAre(testing::VariantWith<InputText>(
	testing::Property(&InputText::GetRawTextString,
	expected_input_text))),
	testing::_))
	.WillOnce([](const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)>
	user_callback) {
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});
	EXPECT_CALL(*mock_session_ptr, RunDecodeAsync(testing::_, testing::_))
	.WillOnce(
	[](absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback,
	const DecodeConfig& decode_config) {
	user_callback(Responses(TaskState::kProcessing, {"I am good."}));
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});

	Message assistant_message = JsonMessage(nlohmann::ordered_json::parse(R"json({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "I am good."
	}
	]
	})json"));
	Message assistant_message_for_confirm = assistant_message;
	absl::Notification done;
	auto message_callback = CreateTestMessageCallback(assistant_message, done);
	EXPECT_OK(conversation->SendMessageAsync(user_messages,
	std::move(message_callback)));
	done.WaitForNotificationWithTimeout(absl::Seconds(10));

	EXPECT_THAT(conversation->GetHistory(),
	testing::ElementsAre(user_messages[0], user_messages[1],
	assistant_message_for_confirm));
	}

	TEST_P(ConversationTest, SendMessageAsyncWithChannelContentFiltering) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation with channel content filtering enabled.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.SetFilterChannelContentFromKvCache(true)
	.SetChannels({litert::lm::Channel{
	.channel_name = "thought",
	.start = "<\|channel>thought\n",
	.end = "<channel\|>",
	}})
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// Helper to get the raw text string from `InputText`.
	auto get_text = [](const InputText& it) -> std::string {
	auto status_or_view = it.GetRawTextString();
	if (!status_or_view.ok()) return "";
	return std::string(*status_or_view);
	};

	// Expect prefill of first user message.
	EXPECT_CALL(*mock_session_ptr, RunPrefillAsync(testing::_, testing::_))
	.WillOnce([](const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)>
	user_callback) {
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});

	// Expect checkpoint to be saved.
	EXPECT_CALL(*mock_session_ptr, SaveCheckpoint("channel_content_checkpoint"))
	.WillOnce(Return(absl::OkStatus()));

	// Expect decode after first user message.
	EXPECT_CALL(*mock_session_ptr, RunDecodeAsync(testing::_, testing::_))
	.WillOnce(
	[](absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback,
	const DecodeConfig& decode_config) {
	user_callback(
	Responses(TaskState::kProcessing,
	{"<\|channel>thought\nhmm<channel\|>I am good."}));
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});

	// Prepare the first user message.
	JsonMessage user_message_1 = {{"role", "user"}, {"content", "How are you?"}};
	Message assistant_message_1 =
	JsonMessage(nlohmann::ordered_json::parse(R"json({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "I am good."
	}
	]
	})json"));
	absl::Notification done_1;
	auto message_callback_1 = [&done_1](absl::StatusOr<Message> message) {
	if (!message.ok()) {
	done_1.Notify();
	return;
	}
	if (auto* json_msg = std::get_if<JsonMessage>(&message.value())) {
	if (json_msg->is_null()) {
	done_1.Notify();
	}
	}
	};

	// Send the first user message.
	EXPECT_OK(conversation->SendMessageAsync(user_message_1,
	std::move(message_callback_1)));
	ASSERT_TRUE(done_1.WaitForNotificationWithTimeout(absl::Seconds(10)));

	// Prepare the second user message.
	JsonMessage user_message_2 = {{"role", "user"}, {"content", "That's great."}};
	absl::Notification done_2;
	Message assistant_message_2 =
	JsonMessage(nlohmann::ordered_json::parse(R"json({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "Indeed."
	}
	]
	})json"));
	auto message_callback_2 = [&done_2](absl::StatusOr<Message> message) {
	if (!message.ok()) {
	done_2.Notify();
	return;
	}
	if (auto* json_msg = std::get_if<JsonMessage>(&message.value())) {
	if (json_msg->is_null()) {
	done_2.Notify();
	}
	}
	};

	// Expect rewind to checkpoint when second user message is sent.
	EXPECT_CALL(*mock_session_ptr,
	RewindToCheckpoint("channel_content_checkpoint"))
	.WillOnce(Return(absl::OkStatus()));

	// Expect the previous assistant message and the new user message to be
	// prefilled asynchronously. The previous assistant message should not contain
	// channel content.
	auto assistant_message_matcher =
	AllOf(HasSubstr("I am good."), Not(HasSubstr("hmm")));
	auto message_input_matcher = ElementsAre(
	VariantWith<InputText>(ResultOf(get_text, assistant_message_matcher)),
	VariantWith<InputText>(ResultOf(get_text, HasSubstr("That's great."))));
	EXPECT_CALL(*mock_session_ptr,
	RunPrefillAsync(message_input_matcher, testing::_))
	.WillOnce([](const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)>
	user_callback) {
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});

	// Expect a new checkpoint to be saved before decode of second turn.
	EXPECT_CALL(*mock_session_ptr, SaveCheckpoint("channel_content_checkpoint"))
	.WillOnce(Return(absl::OkStatus()));

	// Expect decode after second user message.
	EXPECT_CALL(*mock_session_ptr, RunDecodeAsync(testing::_, testing::_))
	.WillOnce(
	[](absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback,
	const DecodeConfig& decode_config) {
	user_callback(Responses(TaskState::kProcessing, {"Indeed."}));
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});

	// Send the second user message.
	EXPECT_OK(conversation->SendMessageAsync(user_message_2,
	std::move(message_callback_2)));
	ASSERT_TRUE(done_2.WaitForNotificationWithTimeout(absl::Seconds(10)));
	}

	TEST_P(ConversationTest, SendMultipleMessagesAsyncWithHistory) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// The first user message.
	JsonMessage user_message_1 = nlohmann::ordered_json::parse(R"json(
	{
	"role": "user",
	"content": "How are you?"
	}
	)json");
	absl::string_view expected_input_text1 =
	"<start_of_turn>user\n"
	"How are you?<end_of_turn>\n";
	EXPECT_CALL(
	*mock_session_ptr,
	RunPrefillAsync(testing::ElementsAre(testing::VariantWith<InputText>(
	testing::Property(&InputText::GetRawTextString,
	expected_input_text1))),
	testing::_))
	.WillOnce([](const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)>
	user_callback) {
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});
	EXPECT_CALL(*mock_session_ptr, RunDecodeAsync(testing::_, testing::_))
	.WillOnce(
	[](absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback,
	const DecodeConfig& decode_config) {
	user_callback(Responses(TaskState::kProcessing, {"I am good."}));
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});

	Message assistant_message_1 =
	JsonMessage(nlohmann::ordered_json::parse(R"json({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "I am good."
	}
	]
	})json"));
	Message assistant_message_1_for_confirm = assistant_message_1;

	absl::Notification done_1;
	EXPECT_OK(conversation->SendMessageAsync(
	user_message_1, CreateTestMessageCallback(assistant_message_1, done_1)));
	done_1.WaitForNotificationWithTimeout(absl::Seconds(10));
	ASSERT_THAT(conversation->GetHistory().size(), testing::Eq(2));

	// We will send two consecutive messages when the history is not empty.
	JsonMessage user_messages = nlohmann::ordered_json::parse(R"json(
	[
	{
	"role": "user",
	"content": "foo"
	},
	{
	"role": "user",
	"content": "bar"
	}
	]
	)json");

	absl::string_view expected_input_text2 =
	"<start_of_turn>user\n"
	"foo<end_of_turn>\n"
	"<start_of_turn>user\n"
	"bar<end_of_turn>\n";
	EXPECT_CALL(
	*mock_session_ptr,
	RunPrefillAsync(testing::ElementsAre(testing::VariantWith<InputText>(
	testing::Property(&InputText::GetRawTextString,
	expected_input_text2))),
	testing::_))
	.WillOnce([](const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)>
	user_callback) {
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});
	EXPECT_CALL(*mock_session_ptr, RunDecodeAsync(testing::_, testing::_))
	.WillOnce(
	[](absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback,
	const DecodeConfig& decode_config) {
	user_callback(Responses(TaskState::kProcessing, {"baz"}));
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});

	Message assistant_message_2 =
	JsonMessage(nlohmann::ordered_json::parse(R"json({
	"role": "assistant",
	"content": [
	{
	"type": "text",
	"text": "baz"
	}
	]
	})json"));
	Message assistant_message_2_for_confirm = assistant_message_2;

	absl::Notification done_2;
	auto message_callbacks_2 =
	CreateTestMessageCallback(assistant_message_2, done_2);
	EXPECT_OK(conversation->SendMessageAsync(user_messages,
	std::move(message_callbacks_2)));
	done_2.WaitForNotificationWithTimeout(absl::Seconds(10));

	EXPECT_THAT(
	conversation->GetHistory(),
	testing::ElementsAre(user_message_1, assistant_message_1_for_confirm,
	user_messages[0], user_messages[1],
	assistant_message_2_for_confirm));
	}

	TEST_P(ConversationTest, SendMessageWithPreface) {
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	engine_settings.GetMutableMainExecutorSettings().SetCacheDir(":nocache");
	engine_settings.GetMutableMainExecutorSettings().SetMaxNumTokens(15);
	ASSERT_OK_AND_ASSIGN(auto engine, EngineFactory::CreateAny(engine_settings));
	ASSERT_OK_AND_ASSIGN(
	auto config,
	ConversationConfig::Builder()
	.SetPreface(JsonPreface{
	.messages = {{{"role", "system"},
	{"content", "You are a helpful assistant."}}}})
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.Build(*engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*engine, config));
	ASSERT_OK_AND_ASSIGN(const Message message,
	conversation->SendMessage(JsonMessage{
	{"role", "user"}, {"content", "Hello world!"}}));
	// The expected message is just some gibberish text, because the test LLM has
	// random weights.
	JsonMessage expected_message;
	if (prefill_preface_on_init_) {
	expected_message = {{"role", "assistant"},
	{"content",
	{{{"type", "text"},
	{"text", " rupani rupani rupani echoes echoes"}}}}};
	} else {
	expected_message = {
	{"role", "assistant"},
	{"content",
	{{{"type", "text"},
	{"text", " noses</caption> গ্রাহ<unused5296> omp"}}}}};
	}
	const JsonMessage& json_message = std::get<JsonMessage>(message);
	EXPECT_EQ(json_message, expected_message);
	}

	TEST_P(ConversationTest, GetBenchmarkInfo) {
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	engine_settings.GetMutableMainExecutorSettings().SetCacheDir(":nocache");
	engine_settings.GetMutableMainExecutorSettings().SetMaxNumTokens(15);
	proto::BenchmarkParams benchmark_params;
	engine_settings.GetMutableBenchmarkParams() = benchmark_params;
	ASSERT_OK_AND_ASSIGN(auto engine, EngineFactory::CreateAny(engine_settings));
	ASSERT_OK_AND_ASSIGN(
	auto config,
	ConversationConfig::Builder()
	.SetPreface(JsonPreface{
	.messages = {{{"role", "system"},
	{"content", "You are a helpful assistant."}}}})
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.Build(*engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*engine, config));
	ASSERT_OK_AND_ASSIGN(const Message message_1,
	conversation->SendMessage(JsonMessage{
	{"role", "user"}, {"content", "Hello world!"}}));
	ASSERT_OK_AND_ASSIGN(const BenchmarkInfo benchmark_info_1,
	conversation->GetBenchmarkInfo());
	EXPECT_EQ(benchmark_info_1.GetTotalPrefillTurns(),
	prefill_preface_on_init_ ? 2 : 1);

	ASSERT_OK_AND_ASSIGN(const Message message_2,
	conversation->SendMessage(JsonMessage{
	{"role", "user"}, {"content", "Hello world!"}}));
	ASSERT_OK_AND_ASSIGN(const BenchmarkInfo benchmark_info_2,
	conversation->GetBenchmarkInfo());
	EXPECT_EQ(benchmark_info_2.GetTotalPrefillTurns(),
	prefill_preface_on_init_ ? 3 : 2);
	}

	TEST_P(ConversationTest, CancelGroupWithSendMessageAsync) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// We will send a single message.
	JsonMessage user_message = {{"role", "user"}, {"content", "How are you?"}};

	auto mock_task_controller1 = std::make_unique<MockTaskController>();
	// Expect Cancel() to be called on the first task controller when
	// CancelGroup("group1") is called.
	EXPECT_CALL(*mock_task_controller1, Cancel())
	.WillOnce(testing::Return(absl::OkStatus()));
	auto mock_task_controller2 = std::make_unique<MockTaskController>();
	// Expect Cancel() to be called on the second task controller when
	// CancelGroup("group1") is called.
	EXPECT_CALL(*mock_task_controller2, Cancel())
	.WillOnce(testing::Return(absl::OkStatus()));

	// Expect RunPrefillAsync to be called and return the first task controller.
	EXPECT_CALL(*mock_session_ptr, RunPrefillAsync(testing::_, testing::_))
	.WillOnce([&](const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)>
	user_callback) {
	user_callback(Responses(TaskState::kDone));
	return std::move(mock_task_controller1);
	});
	// Expect RunDecodeAsync to be called and return the second task controller.
	EXPECT_CALL(*mock_session_ptr, RunDecodeAsync(testing::_, testing::_))
	.WillOnce(
	[&](absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback,
	const DecodeConfig& decode_config) {
	return std::move(mock_task_controller2);
	});

	absl::Notification done;
	absl::Status status;
	EXPECT_OK(
	conversation->SendMessageAsync(user_message,
	[&](absl::StatusOr<Message> message) {
	status = message.status();
	done.Notify();
	},
	{.task_group_id = "group1"}));

	conversation->CancelGroup("group1");
	}

	TEST_P(ConversationTest, CancelGroupWithRunTextScoringAsync) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();

	auto cloned_session = std::make_unique<MockSession>();
	// Expect GetSessionConfig to be called on the cloned session.
	MockSession* cloned_session_ptr = cloned_session.get();
	EXPECT_CALL(*cloned_session_ptr, GetSessionConfig())
	.WillRepeatedly(testing::ReturnRef(session_config_));

	// Expect CloneAsync to be called and return the cloned session.
	EXPECT_CALL(*mock_session_ptr, CloneAsync(testing::_))
	.WillOnce(testing::Return(std::move(cloned_session)));
	auto mock_engine = CreateMockEngine(std::move(mock_session));
	EXPECT_CALL(*mock_engine, GetTokenizer())
	.WillRepeatedly(testing::ReturnRef(*tokenizer_));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	auto mock_task_controller = std::make_unique<MockTaskController>();
	// Expect Cancel() to be called on the task controller when
	// CancelGroup("group1") is called.
	EXPECT_CALL(*mock_task_controller, Cancel())
	.WillOnce(testing::Return(absl::OkStatus()));

	// Expect RunTextScoringAsync to be called on the cloned session and return
	// the task controller.
	EXPECT_CALL(
	*cloned_session_ptr,
	RunTextScoringAsync(testing::ElementsAre("I am good."), testing::_, true))
	.WillOnce(
	[&](const std::vector<absl::string_view>& target_text,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback,
	bool store_token_lengths) {
	return std::move(mock_task_controller);
	});

	absl::Notification done;
	std::string response_text;
	EXPECT_OK(conversation->RunTextScoringAsync(
	{"I am good."},
	[&](absl::StatusOr<Responses> responses) {
	ASSERT_OK(responses);
	response_text = responses->GetTexts()[0];
	done.Notify();
	},
	{.task_group_id = "group1"}));

	conversation->CancelGroup("group1");
	}

	INSTANTIATE_TEST_SUITE_P(
	ConversationTest, ConversationTest,
	testing::ValuesIn(ConversationTest::GetTestParams()),
	[](const testing::TestParamInfo<ConversationTestParams>& info) {
	return absl::StrCat(
	info.param.enable_constrained_decoding ? "Constrained" : "Free", "_",
	info.param.prefill_preface_on_init ? "PrefillOnInit"
	: "NoPrefillOnInit");
	});

	absl::AnyInvocable<void(absl::StatusOr<Message>)>
	CreateCancelledMessageCallback(absl::Status& status, absl::Notification& done) {
	return [&status, &done](absl::StatusOr<Message> message) mutable {
	if (!message.ok()) {
	status = message.status();
	done.Notify();
	return;
	}
	if (auto json_message = std::get_if<JsonMessage>(&message.value());
	json_message->is_null()) {
	status = absl::OkStatus();
	done.Notify();
	return;
	}
	// Wait for a short time to slow down the decoding process, so that the
	// cancellation can be triggered in the middle of decoding.
	absl::SleepFor(absl::Milliseconds(100));
	};
	}

	TEST(ConversationAccessHistoryTest, AccessHistory) {
	// Create a Conversation.
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	engine_settings.GetMutableMainExecutorSettings().SetCacheDir(":nocache");
	engine_settings.GetMutableMainExecutorSettings().SetMaxNumTokens(10);
	ASSERT_OK_AND_ASSIGN(auto engine, EngineFactory::CreateAny(engine_settings));
	ASSERT_OK_AND_ASSIGN(auto config, ConversationConfig::CreateDefault(*engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*engine, config));

	// Send a message to the LLM.
	JsonMessage user_message = {{"role", "user"}, {"content", "Hello world!"}};
	Message expected_assistant_message =
	JsonMessage({{"role", "assistant"},
	{"content",
	{{{"type", "text"},
	{"text", "TarefaByte دارایेत्र investigaciónప్రదేశ"}}}}});
	Message expected_assistant_message_for_confirm = expected_assistant_message;
	absl::Notification done;
	EXPECT_OK(conversation->SendMessageAsync(
	user_message,
	CreateTestMessageCallback(expected_assistant_message, done)));
	done.WaitForNotificationWithTimeout(absl::Seconds(10));

	// Get the history copy.
	auto history = conversation->GetHistory();
	ASSERT_THAT(history.size(), 2);
	ASSERT_THAT(history.back(),
	testing::VariantWith<JsonMessage>(std::get<JsonMessage>(
	expected_assistant_message_for_confirm)));

	// Access the history with visitor function, and copy the last message.
	Message last_message;
	conversation->AccessHistory(
	[&last_message](const std::vector<Message>& history_view) {
	// Copy the last message to last_message. So we don't need to
	// copy the whole history, if we only need the last message.
	last_message = history_view.back();
	});
	EXPECT_THAT(last_message,
	testing::VariantWith<JsonMessage>(std::get<JsonMessage>(
	expected_assistant_message_for_confirm)));
	}

	class ConversationCancellationTest : public testing::TestWithParam<bool> {
	protected:
	bool use_benchmark_info_ = GetParam();
	};

	TEST_P(ConversationCancellationTest, CancelProcessWithBenchmarkInfo) {
	bool use_benchmark_info = use_benchmark_info_;
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	engine_settings.GetMutableMainExecutorSettings().SetCacheDir(":nocache");
	// Set a large max num tokens to ensure the decoding is not finished before
	// cancellation.
	engine_settings.GetMutableMainExecutorSettings().SetMaxNumTokens(20);
	if (use_benchmark_info) {
	proto::BenchmarkParams benchmark_params;
	engine_settings.GetMutableBenchmarkParams() = benchmark_params;
	}
	ASSERT_OK_AND_ASSIGN(auto engine, EngineFactory::CreateAny(engine_settings));
	ASSERT_OK_AND_ASSIGN(auto config, ConversationConfig::CreateDefault(*engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*engine, config));

	absl::Status status;
	absl::Notification done_1;
	conversation
	->SendMessageAsync(
	JsonMessage{{"role", "user"}, {"content", "Hello world!"}},
	CreateCancelledMessageCallback(status, done_1))
	.IgnoreError();
	// Wait for a short time to ensure the decoding has started.
	absl::SleepFor(absl::Milliseconds(100));
	conversation->CancelProcess();
	// Wait for the callback to be done.
	done_1.WaitForNotificationWithTimeout(absl::Seconds(10));
	EXPECT_THAT(status, testing::status::StatusIs(absl::StatusCode::kCancelled));

	// The history should be empty after cancellation.
	EXPECT_THAT(conversation->GetHistory().size(), 0);

	// Resend the message after cancellation, and it should succeed.
	status = absl::OkStatus();
	absl::Notification done_2;
	conversation
	->SendMessageAsync(
	JsonMessage{{"role", "user"}, {"content", "Hello world!"}},
	CreateCancelledMessageCallback(status, done_2))
	.IgnoreError();
	EXPECT_OK(status);
	// Wait for the callback to be done.
	done_2.WaitForNotificationWithTimeout(absl::Seconds(10));
	// Without cancellation, the history should have two messages, user and
	// assistant.
	auto history = conversation->GetHistory();
	ASSERT_EQ(history.size(), 2);
	EXPECT_THAT(history[0], testing::VariantWith<JsonMessage>(JsonMessage{
	{"role", "user"}, {"content", "Hello world!"}}));
	// TODO(b/450903294) - Because the cancellation is not fully rollbacked, the
	// assistant message content depends on at which step the cancellation is
	// triggered, and that is non-deterministic. Here we only check the role is
	// assistant.
	EXPECT_THAT(std::holds_alternative<JsonMessage>(history[1]),
	testing::IsTrue());
	EXPECT_EQ(std::get<JsonMessage>(history[1])["role"], "assistant");

	conversation->CancelProcess();
	// No op after cancellation again.
	EXPECT_THAT(conversation->GetHistory().size(), 2);
	}

	INSTANTIATE_TEST_SUITE_P(ConversationCancellationTest,
	ConversationCancellationTest, testing::Bool(),
	testing::PrintToStringParamName());

	class MockConstraint : public Constraint {
	public:
	class MockState : public State {
	public:
	~MockState() override = default;
	};
	MOCK_METHOD(std::unique_ptr<State>, Start, (), (const, override));
	MOCK_METHOD(bool, IsEnded, (const State& state), (const, override));
	MOCK_METHOD(int, GetVocabularySize, (), (const, override));
	MOCK_METHOD(absl::StatusOr<std::unique_ptr<State>>, ComputeNext,
	(const State& state, int token), (const, override));
	MOCK_METHOD(absl::StatusOr<std::unique_ptr<Bitmap>>, ComputeBitmap,
	(const State& state), (const, override));
	};

	TEST_P(ConversationTest, SendMessageWithConstraint) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));

	// Create Conversation with ExternalConstraintConfig.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.SetConstraintProviderConfig(ExternalConstraintConfig())
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// Create a mock constraint.
	auto mock_constraint = std::make_unique<MockConstraint>();
	Constraint* mock_constraint_ptr = mock_constraint.get();
	ExternalConstraintArg constraint_arg;
	constraint_arg.constraint = std::move(mock_constraint);

	// Send a message with the constraint.
	JsonMessage user_message = {{"role", "user"}, {"content", "How are you?"}};

	EXPECT_CALL(*mock_session_ptr, RunPrefill(testing::_))
	.WillOnce(testing::Return(absl::OkStatus()));

	// Verify that the constraint is passed to RunDecode.
	EXPECT_CALL(*mock_session_ptr,
	RunDecode(testing::Property(&DecodeConfig::GetConstraint,
	mock_constraint_ptr)))
	.WillOnce(
	testing::Return(Responses(TaskState::kProcessing, {"I am good."})));

	ASSERT_OK_AND_ASSIGN(
	const Message response,
	conversation->SendMessage(
	user_message, {
	.decoding_constraint = std::move(constraint_arg),
	}));
	}

	TEST_P(ConversationTest, Clone) {
	// Set up mock Session.
	auto mock_session = CreateMockSession();
	MockSession* mock_session_ptr = mock_session.get();
	auto mock_engine = CreateMockEngine(std::move(mock_session));
	EXPECT_CALL(*mock_engine, GetTokenizer())
	.WillRepeatedly(testing::ReturnRef(*tokenizer_));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config_)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.SetEnableConstrainedDecoding(enable_constrained_decoding_)
	.SetPrefillPrefaceOnInit(prefill_preface_on_init_)
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// Send a message to populate history.
	JsonMessage user_message = {{"role", "user"}, {"content", "Hello"}};
	EXPECT_CALL(*mock_session_ptr, RunPrefill(testing::_))
	.WillOnce(testing::Return(absl::OkStatus()));
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(testing::Return(Responses(TaskState::kProcessing, {"Hi"})));
	ASSERT_OK(conversation->SendMessage(user_message));

	// Expect Session::Clone to be called.
	auto cloned_mock_session = std::make_unique<MockSession>();
	MockSession* cloned_mock_session_ptr = cloned_mock_session.get();
	EXPECT_CALL(*cloned_mock_session_ptr, GetSessionConfig())
	.WillRepeatedly(testing::ReturnRef(session_config_));
	EXPECT_CALL(*mock_session_ptr, Clone())
	.WillOnce(testing::Return(std::move(cloned_mock_session)));

	// Clone the conversation.
	ASSERT_OK_AND_ASSIGN(auto cloned_conversation, conversation->Clone());

	// Verify the history in the cloned conversation.
	auto history = cloned_conversation->GetHistory();
	EXPECT_EQ(history.size(), 2);
	EXPECT_EQ(std::get<JsonMessage>(history[0]), user_message);

	// Verify that sending a message in the cloned conversation works and uses the
	// cloned session.
	JsonMessage user_message2 = {{"role", "user"}, {"content", "How are you?"}};
	EXPECT_CALL(*cloned_mock_session_ptr, RunPrefill(testing::_))
	.WillOnce(testing::Return(absl::OkStatus()));
	EXPECT_CALL(*cloned_mock_session_ptr, RunDecode(testing::_))
	.WillOnce(
	testing::Return(Responses(TaskState::kProcessing, {"I am good."})));

	ASSERT_OK(cloned_conversation->SendMessage(user_message2));

	// Verify that the original conversation is unaffected by the new message in
	// the cloned one.
	EXPECT_EQ(conversation->GetHistory().size(), 2);
	EXPECT_EQ(cloned_conversation->GetHistory().size(), 4);
	}

	TEST_P(ConversationTest, SendMessageWithMaxOutputTokens) {
	// Set up mock Session.
	auto mock_session = std::make_unique<MockSession>();
	MockSession* mock_session_ptr = mock_session.get();
	SessionConfig session_config = SessionConfig::CreateDefault();
	session_config.SetStartTokenId(0);
	session_config.GetMutableStopTokenIds().push_back({1});
	*session_config.GetMutableLlmModelType().mutable_gemma3() = {};
	EXPECT_CALL(*mock_session_ptr, GetSessionConfig())
	.WillRepeatedly(testing::ReturnRef(session_config));

	// Set up mock Engine.
	auto mock_engine = std::make_unique<MockEngine>();
	EXPECT_CALL(*mock_engine, CreateSession(testing::_))
	.WillOnce(testing::Return(std::move(mock_session)));
	EXPECT_CALL(*mock_engine, GetTokenizer())
	.WillRepeatedly(testing::ReturnRef(*tokenizer_));
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	EXPECT_CALL(*mock_engine, GetEngineSettings())
	.WillRepeatedly(testing::ReturnRef(engine_settings));

	// Create Conversation with default config.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config)
	.SetOverwritePromptTemplate(PromptTemplate(kTestJinjaPromptTemplate))
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	JsonMessage user_message = {{"role", "user"}, {"content", "How are you?"}};

	EXPECT_CALL(*mock_session_ptr, RunPrefill(testing::_))
	.WillOnce(testing::Return(absl::OkStatus()));

	// Verify that the max_output_tokens is passed to RunDecode.
	EXPECT_CALL(*mock_session_ptr,
	RunDecode(testing::Property(&DecodeConfig::GetMaxOutputTokens,
	std::make_optional(42))))
	.WillOnce(
	testing::Return(Responses(TaskState::kProcessing, {"I am good."})));

	ASSERT_OK_AND_ASSIGN(
	const Message response,
	conversation->SendMessage(user_message, {.max_output_tokens = 42}));
	}

	TEST(AppendMessageTest, Gemma3Sync) {
	// Set up mock Session.
	auto mock_session = std::make_unique<MockSession>();
	MockSession* mock_session_ptr = mock_session.get();
	SessionConfig session_config = SessionConfig::CreateDefault();
	session_config.SetStartTokenId(0);
	session_config.GetMutableStopTokenIds().push_back({1});
	*session_config.GetMutableLlmModelType().mutable_gemma3() = {};
	session_config.SetApplyPromptTemplateInSession(false);
	EXPECT_CALL(*mock_session_ptr, GetSessionConfig())
	.WillRepeatedly(testing::ReturnRef(session_config));
	ASSERT_OK_AND_ASSIGN(
	auto tokenizer,
	SentencePieceTokenizer::CreateFromFile(
	(std::filesystem::path(::testing::SrcDir()) / kTestTokenizerPath)
	.string()));

	// Set up mock Engine.
	auto mock_engine = std::make_unique<MockEngine>();
	EXPECT_CALL(*mock_engine, CreateSession(testing::_))
	.WillOnce(testing::Return(std::move(mock_session)));
	EXPECT_CALL(*mock_engine, GetTokenizer())
	.WillRepeatedly(testing::ReturnRef(*tokenizer));
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	EXPECT_CALL(*mock_engine, GetEngineSettings())
	.WillRepeatedly(testing::ReturnRef(engine_settings));

	std::string template_text =
	ReadFile(GetTestdataPath(kGemma3ToolsMultiPrefillTemplatePath));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config)
	.SetOverwritePromptTemplate(PromptTemplate(template_text))
	.SetPreface(JsonPreface{
	.messages = {{{"role", "system"},
	{"content", "You are a helpful assistant."}}}})
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// Append the 1st message.
	absl::string_view expected_prefill_1 =
	"<start_of_turn>user\nYou are a helpful "
	"assistant.\n\n<end_of_turn>\n<start_of_turn>user\nHello world!";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_prefill_1)))))
	.Times(1)
	.WillOnce(testing::Return(absl::OkStatus()));
	ASSERT_OK(conversation->SendMessage(
	JsonMessage{{"role", "user"}, {"content", "Hello world!"}},
	{.has_pending_message = true}));

	// Append the 2nd message.
	absl::string_view expected_prefill_2 = " This is a long message.";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_prefill_2)))))
	.Times(1)
	.WillOnce(testing::Return(absl::OkStatus()));
	ASSERT_OK(conversation->SendMessage(
	JsonMessage{{"role", "user"}, {"content", " This is a long message."}},
	{.has_pending_message = true}));

	// Append the 3rd message.
	absl::string_view expected_prefill_3 = " continuing...";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_prefill_3)))))
	.Times(1)
	.WillOnce(testing::Return(absl::OkStatus()));
	ASSERT_OK(conversation->SendMessage(
	JsonMessage{{"role", "user"}, {"content", " continuing..."}},
	{.has_pending_message = true}));

	// Finish appending message.
	absl::string_view expected_prefill_4 =
	" The message is ended.<end_of_turn>\n<start_of_turn>model\n";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_prefill_4)))))
	.Times(1)
	.WillOnce(testing::Return(absl::OkStatus()));
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(
	testing::Return(Responses(TaskState::kProcessing, {"I am good."})));
	ASSERT_OK_AND_ASSIGN(
	const Message response_appending,
	conversation->SendMessage(JsonMessage{
	{"role", "user"}, {"content", " The message is ended."}}));
	}

	TEST(AppendMessageTest, Gemma3Async) {
	// Set up mock Session.
	auto mock_session = std::make_unique<MockSession>();
	MockSession* mock_session_ptr = mock_session.get();
	SessionConfig session_config = SessionConfig::CreateDefault();
	session_config.SetStartTokenId(0);
	session_config.GetMutableStopTokenIds().push_back({1});
	*session_config.GetMutableLlmModelType().mutable_gemma3() = {};
	session_config.SetApplyPromptTemplateInSession(false);
	EXPECT_CALL(*mock_session_ptr, GetSessionConfig())
	.WillRepeatedly(testing::ReturnRef(session_config));
	ASSERT_OK_AND_ASSIGN(
	auto tokenizer,
	SentencePieceTokenizer::CreateFromFile(
	(std::filesystem::path(::testing::SrcDir()) / kTestTokenizerPath)
	.string()));

	// Set up mock Engine.
	auto mock_engine = std::make_unique<MockEngine>();
	EXPECT_CALL(*mock_engine, CreateSession(testing::_))
	.WillOnce(testing::Return(std::move(mock_session)));
	EXPECT_CALL(*mock_engine, GetTokenizer())
	.WillRepeatedly(testing::ReturnRef(*tokenizer));
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	EXPECT_CALL(*mock_engine, GetEngineSettings())
	.WillRepeatedly(testing::ReturnRef(engine_settings));

	std::string template_text =
	ReadFile(GetTestdataPath(kGemma3ToolsMultiPrefillTemplatePath));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config)
	.SetOverwritePromptTemplate(PromptTemplate(template_text))
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	auto test_callback =
	[](const std::vector<InputData>& contents,
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback) {
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	};

	// Append the 1st message.
	absl::string_view expected_prefill_1 = "<start_of_turn>user\nHello world!";
	EXPECT_CALL(
	*mock_session_ptr,
	RunPrefillAsync(testing::ElementsAre(testing::VariantWith<InputText>(
	testing::Property(&InputText::GetRawTextString,
	expected_prefill_1))),
	testing::_))
	.Times(1)
	.WillOnce(test_callback);
	absl::Notification done1;
	ASSERT_OK(conversation->SendMessageAsync(
	JsonMessage{{"role", "user"}, {"content", "Hello world!"}},
	[&done1](absl::StatusOr<Message> message) { done1.Notify(); },
	{.has_pending_message = true}));
	done1.WaitForNotificationWithTimeout(absl::Seconds(3));

	// Append the 2nd message.
	absl::string_view expected_prefill_2 = " This is a long message.";
	EXPECT_CALL(
	*mock_session_ptr,
	RunPrefillAsync(testing::ElementsAre(testing::VariantWith<InputText>(
	testing::Property(&InputText::GetRawTextString,
	expected_prefill_2))),
	testing::_))
	.Times(1)
	.WillOnce(test_callback);
	absl::Notification done2;
	ASSERT_OK(conversation->SendMessageAsync(
	JsonMessage{{"role", "user"}, {"content", " This is a long message."}},
	[&done2](absl::StatusOr<Message> message) { done2.Notify(); },
	{.has_pending_message = true}));
	done2.WaitForNotificationWithTimeout(absl::Seconds(3));

	// Append the 3rd message.
	absl::string_view expected_prefill_3 = " continuing...";
	EXPECT_CALL(
	*mock_session_ptr,
	RunPrefillAsync(testing::ElementsAre(testing::VariantWith<InputText>(
	testing::Property(&InputText::GetRawTextString,
	expected_prefill_3))),
	testing::_))
	.Times(1)
	.WillOnce(test_callback);
	absl::Notification done3;
	ASSERT_OK(conversation->SendMessageAsync(
	JsonMessage{{"role", "user"}, {"content", " continuing..."}},
	[&done3](absl::StatusOr<Message> message) { done3.Notify(); },
	{.has_pending_message = true}));
	done3.WaitForNotificationWithTimeout(absl::Seconds(3));

	// Append the 4th message.
	absl::string_view expected_prefill_4 = " The message is ended.";
	EXPECT_CALL(
	*mock_session_ptr,
	RunPrefillAsync(testing::ElementsAre(testing::VariantWith<InputText>(
	testing::Property(&InputText::GetRawTextString,
	expected_prefill_4))),
	testing::_))
	.Times(1)
	.WillOnce(test_callback);
	absl::Notification done4;
	EXPECT_OK(conversation->SendMessageAsync(
	JsonMessage{{"role", "user"}, {"content", " The message is ended."}},
	[&done4](absl::StatusOr<Message> message) { done4.Notify(); },
	{.has_pending_message = true}));
	done4.WaitForNotificationWithTimeout(absl::Seconds(3));

	// The 5th message triggers the decode.
	absl::string_view expected_prefill_5 =
	"<end_of_turn>\n<start_of_turn>model\n";
	EXPECT_CALL(
	*mock_session_ptr,
	RunPrefillAsync(testing::ElementsAre(testing::VariantWith<InputText>(
	testing::Property(&InputText::GetRawTextString,
	expected_prefill_5))),
	testing::_))
	.Times(1)
	.WillOnce(test_callback);
	EXPECT_CALL(*mock_session_ptr, RunDecodeAsync(testing::_, testing::_))
	.WillOnce(
	[](absl::AnyInvocable<void(absl::StatusOr<Responses>)> user_callback,
	const DecodeConfig& decode_config) {
	user_callback(Responses(TaskState::kProcessing, {"I am good."}));
	user_callback(Responses(TaskState::kDone));
	return nullptr;
	});
	Message expected_assistant_message =
	JsonMessage({{"role", "assistant"},
	{"content", {{{"type", "text"}, {"text", "I am good."}}}}});
	absl::Notification done5;
	// Trigger the decode by sending an empty message.
	EXPECT_OK(conversation->SendMessageAsync(
	JsonMessage{{"role", "user"}, {"content", ""}},
	CreateTestMessageCallback(expected_assistant_message, done5),
	{.has_pending_message = false}));
	done5.WaitForNotificationWithTimeout(absl::Seconds(3));
	}

	TEST(AppendMessageTest, Gemma3SyncPrefillPrefaceOnInitAndAlternateRoles) {
	// Set up mock Session.
	auto mock_session = std::make_unique<MockSession>();
	MockSession* mock_session_ptr = mock_session.get();
	SessionConfig session_config = SessionConfig::CreateDefault();
	session_config.SetStartTokenId(0);
	session_config.GetMutableStopTokenIds().push_back({1});
	*session_config.GetMutableLlmModelType().mutable_gemma3() = {};
	session_config.SetApplyPromptTemplateInSession(false);
	EXPECT_CALL(*mock_session_ptr, GetSessionConfig())
	.WillRepeatedly(testing::ReturnRef(session_config));
	ASSERT_OK_AND_ASSIGN(
	auto tokenizer,
	SentencePieceTokenizer::CreateFromFile(
	(std::filesystem::path(::testing::SrcDir()) / kTestTokenizerPath)
	.string()));

	// Set up mock Engine.
	auto mock_engine = std::make_unique<MockEngine>();
	EXPECT_CALL(*mock_engine, CreateSession(testing::_))
	.WillOnce(testing::Return(std::move(mock_session)));
	EXPECT_CALL(*mock_engine, GetTokenizer())
	.WillRepeatedly(testing::ReturnRef(*tokenizer));
	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create(GetTestdataPath(kTestLlmPath)));
	ASSERT_OK_AND_ASSIGN(auto engine_settings, EngineSettings::CreateDefault(
	model_assets, Backend::CPU));
	EXPECT_CALL(*mock_engine, GetEngineSettings())
	.WillRepeatedly(testing::ReturnRef(engine_settings));

	std::string template_text =
	ReadFile(GetTestdataPath(kGemma3ToolsMultiPrefillTemplatePath));

	// Init with preface.
	absl::string_view expected_prefill_preface = R"(<start_of_turn>system
	def tool_name(
	x: int \| None = None,
	) -> dict:
	"""
	Args:
	x """

	<end_of_turn>
	<start_of_turn>user
	You are a helpful assistant.

	<end_of_turn>
	)";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(testing::VariantWith<InputText>(
	testing::Property(&InputText::GetRawTextString,
	expected_prefill_preface)))))
	.Times(1)
	.WillOnce(testing::Return(absl::OkStatus()));

	// Create Conversation.
	ASSERT_OK_AND_ASSIGN(
	auto conversation_config,
	ConversationConfig::Builder()
	.SetSessionConfig(session_config)
	.SetOverwritePromptTemplate(PromptTemplate(template_text))
	.SetPreface(JsonPreface{
	.messages = {{{"role", "system"},
	{"content", "You are a helpful assistant."}}},
	.tools = nlohmann::ordered_json::parse(
	R"json([{
	"name": "tool_name",
	"parameters": {
	"properties": {
	"x": {
	"type": "integer"
	}
	}
	}
	}])json")})
	.SetPrefillPrefaceOnInit(true)
	.Build(*mock_engine));
	ASSERT_OK_AND_ASSIGN(auto conversation,
	Conversation::Create(*mock_engine, conversation_config));

	// Append the 1st message.
	absl::string_view expected_prefill_1 = "<start_of_turn>user\nHello world!";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_prefill_1)))))
	.Times(1)
	.WillOnce(testing::Return(absl::OkStatus()));
	ASSERT_OK(conversation->SendMessage(
	JsonMessage{{"role", "user"}, {"content", "Hello world!"}},
	{.has_pending_message = true}));

	// Append the 2nd message.
	absl::string_view expected_prefill_2 =
	"<end_of_turn>\n<start_of_turn>model\nNice to meet you.";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_prefill_2)))))
	.Times(1)
	.WillOnce(testing::Return(absl::OkStatus()));
	ASSERT_OK(conversation->SendMessage(
	JsonMessage{{"role", "model"}, {"content", "Nice to meet you."}},
	{.has_pending_message = true}));

	// Append the 3rd message.
	absl::string_view expected_prefill_3 = " How can I help you today?";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_prefill_3)))))
	.Times(1)
	.WillOnce(testing::Return(absl::OkStatus()));
	ASSERT_OK(conversation->SendMessage(
	JsonMessage{{"role", "model"}, {"content", " How can I help you today?"}},
	{.has_pending_message = true}));

	// Append the 4th message.
	absl::string_view expected_prefill_4 = " The message is ended.";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_prefill_4)))))
	.Times(1)
	.WillOnce(testing::Return(absl::OkStatus()));
	ASSERT_OK(conversation->SendMessage(
	JsonMessage{{"role", "model"}, {"content", " The message is ended."}},
	{.has_pending_message = true}));

	// Append the 5th message.
	absl::string_view expected_prefill_5 = R"(<end_of_turn>
	<start_of_turn>user
	```tool_outputs
	{"location": "Paris", "temperature": 20, "unit": "C", "weather": "Sunny"})";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_prefill_5)))))
	.Times(1)
	.WillOnce(testing::Return(absl::OkStatus()));
	ASSERT_OK(
	conversation->SendMessage(JsonMessage{{"role", "tool"},
	{"content",
	{
	{"type", "tool_response"},
	{"tool_response",
	{
	{"location", "Paris"},
	{"temperature", 20},
	{"unit", "C"},
	{"weather", "Sunny"},
	}},
	}}},
	{.has_pending_message = true}));

	// Append the 6th message.
	absl::string_view expected_prefill_6 =
	R"({"location": "London", "temperature": 15, "unit": "C", "weather": "Cloudy"}
	```<end_of_turn>
	<start_of_turn>model
	)";
	EXPECT_CALL(*mock_session_ptr,
	RunPrefill(testing::ElementsAre(
	testing::VariantWith<InputText>(testing::Property(
	&InputText::GetRawTextString, expected_prefill_6)))))
	.Times(1)
	.WillOnce(testing::Return(absl::OkStatus()));
	EXPECT_CALL(*mock_session_ptr, RunDecode(testing::_))
	.WillOnce(
	testing::Return(Responses(TaskState::kProcessing, {"I am good."})));
	ASSERT_OK(
	conversation->SendMessage(JsonMessage{{"role", "tool"},
	{"content",
	{
	{"type", "tool_response"},
	{"tool_response",
	{
	{"location", "London"},
	{"temperature", 15},
	{"unit", "C"},
	{"weather", "Cloudy"},
	}},
	}}},
	{.has_pending_message = false}));
	}

	} // namespace
	} // namespace litert::lm