Spaces:

FINAL-Bench
/

LiteRT-LM

Running

App Files Files Community

LiteRT-LM / runtime /framework /resource_management /execution_manager_test.cc

SeaWolf-AI

Upload full LiteRT-LM codebase

5f923cd verified 5 days ago

raw

history blame contribute delete

40 kB

	// Copyright 2025 The ODML Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include "runtime/framework/resource_management/execution_manager.h"

	#include <atomic>
	#include <memory>
	#include <optional>
	#include <string>
	#include <utility>
	#include <vector>

	#include <gmock/gmock.h>
	#include <gtest/gtest.h>
	#include "absl/functional/any_invocable.h" // from @com_google_absl
	#include "absl/status/status.h" // from @com_google_absl
	#include "absl/status/statusor.h" // from @com_google_absl
	#include "absl/strings/str_cat.h" // from @com_google_absl
	#include "absl/strings/string_view.h" // from @com_google_absl
	#include "absl/time/time.h" // from @com_google_absl
	#include "litert/cc/litert_tensor_buffer.h" // from @litert
	#include "runtime/components/constrained_decoding/fake_constraint.h"
	#include "runtime/components/model_resources.h"
	#include "runtime/components/tokenizer.h"
	#include "runtime/engine/engine_settings.h"
	#include "runtime/engine/io_types.h"
	#include "runtime/executor/audio_executor.h"
	#include "runtime/executor/audio_executor_settings.h"
	#include "runtime/executor/executor_settings_base.h"
	#include "runtime/executor/fake_llm_executor.h"
	#include "runtime/executor/llm_executor_io_types.h"
	#include "runtime/proto/token.pb.h"
	#include "runtime/util/status_macros.h" // IWYU pragma: keep
	#include "runtime/util/test_utils.h" // NOLINT

	namespace litert::lm {
	namespace {

	using ::testing::ElementsAre;
	using ::testing::Return;

	class MockTokenizer : public Tokenizer {
	public:
	MOCK_METHOD(absl::StatusOr<std::vector<int>>, TextToTokenIds,
	(absl::string_view text), (override));
	MOCK_METHOD(absl::StatusOr<int>, TokenToId, (absl::string_view token),
	(override));
	MOCK_METHOD(absl::StatusOr<std::string>, TokenIdsToText,
	(const std::vector<int>& token_ids), (override));
	MOCK_METHOD(TokenizerType, GetTokenizerType, (), (const, override));
	MOCK_METHOD(std::vector<std::string>, GetTokens, (), (const, override));
	};

	class FakeAudioExecutor : public AudioExecutor {
	public:
	absl::StatusOr<::litert::lm::ExecutorAudioData> Encode(
	const litert::TensorBuffer& spectrogram_tensor) override {
	return ::litert::lm::ExecutorAudioData();
	}
	};

	class ExecutionManagerTest : public ::testing::Test {
	protected:
	void SetUp() override {
	tokenizer_ = std::make_unique<MockTokenizer>();
	EXPECT_CALL(*tokenizer_, TokenIdsToText(ElementsAre(0)))
	.WillRepeatedly(Return("0"));
	EXPECT_CALL(*tokenizer_, TokenIdsToText(ElementsAre(4)))
	.WillRepeatedly(Return("4"));
	EXPECT_CALL(*tokenizer_, TokenIdsToText(ElementsAre(5)))
	.WillRepeatedly(Return("5"));
	EXPECT_CALL(*tokenizer_, TokenIdsToText(ElementsAre(6)))
	.WillRepeatedly(Return("6"));
	}

	absl::StatusOr<SessionConfig> CreateDefaultSessionConfig(
	bool use_external_sampler = false) {
	ASSIGN_OR_RETURN(auto model_assets,
	ModelAssets::Create("test_model_path_1"));
	ASSIGN_OR_RETURN(auto settings,
	EngineSettings::CreateDefault(model_assets));

	proto::LlmMetadata llm_metadata;
	llm_metadata.mutable_stop_tokens()
	->Add()
	->mutable_token_ids()
	->mutable_ids()
	->Add(0);
	llm_metadata.mutable_stop_tokens()
	->Add()
	->mutable_token_ids()
	->mutable_ids()
	->Add(6);
	llm_metadata.mutable_llm_model_type()->mutable_gemma3n();
	EXPECT_OK(settings.MaybeUpdateAndValidate(tokenizer_.get(), &llm_metadata));
	SessionConfig session_config = SessionConfig::CreateDefault();
	EXPECT_OK(session_config.MaybeUpdateAndValidate(settings));
	session_config.SetUseExternalSampler(use_external_sampler);
	model_resources_ = std::unique_ptr<ModelResources>();
	return session_config;
	};

	void CreateExecutionManager(
	std::unique_ptr<FakeLlmExecutor> fake_llm_executor,
	std::unique_ptr<AudioExecutorSettings> audio_executor_settings = nullptr,
	std::unique_ptr<AudioExecutor> audio_executor = nullptr) {
	// The objects are moved to execution_manager_ so we can't access them
	// after creation.
	ASSERT_OK_AND_ASSIGN(
	execution_manager_,
	ExecutionManager::Create(
	/tokenizer=/tokenizer_.get(),
	/model_resources=/model_resources_.get(),
	/llm_executor=/std::move(fake_llm_executor),
	/vision_executor_settings=/nullptr,
	std::move(audio_executor_settings),
	/litert_env=/nullptr, std::move(audio_executor)));
	}

	std::unique_ptr<FakeLlmExecutor> CreateDefaultFakeLlmExecutor(
	std::optional<std::vector<std::vector<int>>> override_prefill_tokens =
	std::nullopt) {
	auto prefill_tokens = std::vector<std::vector<int>>{{1, 2, 3}};
	if (override_prefill_tokens.has_value()) {
	prefill_tokens = *override_prefill_tokens;
	}
	auto decode_tokens = std::vector<std::vector<int>>{{4}, {5}, {6}};
	return std::make_unique<FakeLlmExecutor>(
	/vocab_size=/10,
	/prefill_tokens=/std::move(prefill_tokens),
	/decode_tokens=/std::move(decode_tokens));
	}

	std::unique_ptr<MockTokenizer> tokenizer_;

	std::unique_ptr<ModelResources> model_resources_;

	std::unique_ptr<ExecutionManager> execution_manager_;
	};

	TEST_F(ExecutionManagerTest, CanGetMutableBenchmarkInfo) {
	CreateExecutionManager(CreateDefaultFakeLlmExecutor());
	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(
	session_config, std::make_optional<BenchmarkInfo>(
	proto::BenchmarkParams())));
	ASSERT_OK_AND_ASSIGN(BenchmarkInfo * benchmark_info,
	execution_manager_->GetMutableBenchmarkInfo(session_id));
	EXPECT_NE(benchmark_info, nullptr);
	}

	TEST_F(ExecutionManagerTest, GetMutableBenchmarkInfoFailsIfNoBenchmarkInfo) {
	CreateExecutionManager(CreateDefaultFakeLlmExecutor());
	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));
	EXPECT_THAT(execution_manager_->GetMutableBenchmarkInfo(session_id),
	testing::status::StatusIs(absl::StatusCode::kInvalidArgument));
	}

	TEST_F(ExecutionManagerTest, AddPrefillTask) {
	CreateExecutionManager(CreateDefaultFakeLlmExecutor({{{1, 2, 3, -4}}}));
	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<TaskState> task_states;
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback =
	[&task_states](absl::StatusOr<Responses> responses) {
	ASSERT_OK(responses);
	task_states.push_back(responses->GetTaskState());
	};

	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	inputs.push_back(InputAudioEnd());

	ASSERT_OK_AND_ASSIGN(const TaskId task_id,
	execution_manager_->GetNewTaskId());

	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, task_id, std::move(inputs), {},
	std::make_shared<std::atomic<bool>>(false), std::move(callback)));

	EXPECT_OK(execution_manager_->WaitUntilDone(task_id, absl::Seconds(3)));

	EXPECT_THAT(task_states,
	ElementsAre(TaskState::kCreated, TaskState::kQueued,
	TaskState::kProcessing, TaskState::kDone));
	}

	TEST_F(ExecutionManagerTest, AddPrefillTaskWithAudioModality) {
	auto fake_llm_executor = CreateDefaultFakeLlmExecutor();

	ASSERT_OK_AND_ASSIGN(auto* settings,
	fake_llm_executor->GetMutableExecutorSettings());
	EXPECT_OK(settings->SetBackend(Backend::GPU_ARTISAN));

	ASSERT_OK_AND_ASSIGN(auto model_assets,
	ModelAssets::Create("test_model_path_2"));
	ASSERT_OK_AND_ASSIGN(auto audio_settings,
	AudioExecutorSettings::CreateDefault(
	model_assets, 128, Backend::GPU_ARTISAN));

	CreateExecutionManager(
	std::move(fake_llm_executor),
	std::make_unique<AudioExecutorSettings>(std::move(audio_settings)),
	std::make_unique<FakeAudioExecutor>());

	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	session_config.SetAudioModalityEnabled(true);

	// Trigger RegisterNewSession which previously acquired nested locks
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	inputs.push_back(InputAudioEnd());

	ASSERT_OK_AND_ASSIGN(const TaskId task_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, task_id, std::move(inputs), {},
	std::make_shared<std::atomic<bool>>(false), nullptr));

	EXPECT_OK(execution_manager_->WaitUntilDone(task_id, absl::Seconds(3)));
	}

	TEST_F(ExecutionManagerTest, AddPrefillTaskInvalidAudioInput) {
	CreateExecutionManager(CreateDefaultFakeLlmExecutor());
	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<TaskState> task_states;
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback =
	[&task_states](absl::StatusOr<Responses> responses) {
	if (!responses.ok()) {
	ASSERT_THAT(responses, testing::status::StatusIs(
	absl::StatusCode::kFailedPrecondition));
	ASSERT_THAT(responses.status().message(),
	testing::Eq("The audio is not a preprocessed tensor."));
	task_states.push_back(TaskState::kFailed);
	} else {
	ASSERT_OK(responses);
	task_states.push_back(responses->GetTaskState());
	}
	};

	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	InputAudio input_audio("");
	inputs.push_back(std::move(input_audio));

	ASSERT_OK_AND_ASSIGN(const TaskId task_id,
	execution_manager_->GetNewTaskId());

	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, task_id, std::move(inputs), {},
	std::make_shared<std::atomic<bool>>(false), std::move(callback)));

	EXPECT_OK(execution_manager_->WaitUntilDone(task_id, absl::Seconds(3)));

	EXPECT_THAT(task_states,
	ElementsAre(TaskState::kCreated, TaskState::kQueued,
	TaskState::kProcessing, TaskState::kFailed));
	}

	TEST_F(ExecutionManagerTest, AddPrefillTaskInvalidImageInput) {
	CreateExecutionManager(CreateDefaultFakeLlmExecutor());
	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<TaskState> task_states;
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback =
	[&task_states](absl::StatusOr<Responses> responses) {
	if (!responses.ok()) {
	ASSERT_THAT(responses, testing::status::StatusIs(
	absl::StatusCode::kFailedPrecondition));
	ASSERT_THAT(responses.status().message(),
	testing::Eq("Image tensor or tensor map is null in "
	"preprocessed_contents."));
	task_states.push_back(TaskState::kFailed);
	} else {
	ASSERT_OK(responses);
	task_states.push_back(responses->GetTaskState());
	}
	};

	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	InputImage input_image("");
	inputs.push_back(std::move(input_image));

	ASSERT_OK_AND_ASSIGN(const TaskId task_id,
	execution_manager_->GetNewTaskId());

	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, task_id, std::move(inputs), {},
	std::make_shared<std::atomic<bool>>(false), std::move(callback)));

	EXPECT_OK(execution_manager_->WaitUntilDone(task_id, absl::Seconds(3)));

	EXPECT_THAT(task_states,
	ElementsAre(TaskState::kCreated, TaskState::kQueued,
	TaskState::kProcessing, TaskState::kFailed));
	}

	TEST_F(ExecutionManagerTest, AddDecodeTaskWithInternalSampler) {
	// The default execution manager is using the internal sampler.
	CreateExecutionManager(CreateDefaultFakeLlmExecutor());

	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<TaskState> task_states;
	std::vector<std::string> responses_texts;
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback =
	[&task_states, &responses_texts](absl::StatusOr<Responses> responses) {
	ASSERT_OK(responses);
	task_states.push_back(responses->GetTaskState());
	if (!responses->GetTexts().empty()) {
	responses_texts.push_back(responses->GetTexts()[0]);
	}
	};

	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	std::optional<BenchmarkInfo> benchmark_info = std::nullopt;
	ASSERT_OK_AND_ASSIGN(const TaskId prefill_task_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, prefill_task_id, std::move(inputs),
	/dependency_task_ids=/{},
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	/callback=/[](absl::StatusOr<Responses> responses) {}));
	ASSERT_OK(
	execution_manager_->WaitUntilDone(prefill_task_id, absl::Seconds(3)));

	ASSERT_OK_AND_ASSIGN(const TaskId decode_task_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddDecodeTask(
	session_id, decode_task_id,
	/dependency_task_ids=/{},
	/constraint=/nullptr,
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	std::move(callback)));

	EXPECT_OK(
	execution_manager_->WaitUntilDone(decode_task_id, absl::Seconds(3)));

	EXPECT_THAT(task_states,
	ElementsAre(TaskState::kCreated, TaskState::kQueued,
	TaskState::kProcessing, TaskState::kProcessing,
	TaskState::kProcessing, TaskState::kDone));

	EXPECT_THAT(responses_texts, ElementsAre("4", "5"));
	}

	TEST_F(ExecutionManagerTest, AddDecodeTaskWithExternalSampler) {
	std::vector<std::vector<int>> prefill_tokens = {{1, 2, 3}, {6}};
	std::vector<std::vector<int>> decode_tokens = {{4}, {5}, {6}};

	CreateExecutionManager(std::make_unique<FakeLlmExecutor>(
	/vocab_size=/10,
	/prefill_tokens=/std::move(prefill_tokens),
	/decode_tokens=/std::move(decode_tokens)));

	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig(
	/use_external_sampler=/true));
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<TaskState> task_states;
	std::vector<std::string> responses_texts;
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback =
	[&task_states, &responses_texts](absl::StatusOr<Responses> responses) {
	ASSERT_OK(responses);
	task_states.push_back(responses->GetTaskState());
	if (!responses->GetTexts().empty()) {
	responses_texts.push_back(responses->GetTexts()[0]);
	}
	};

	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	std::optional<BenchmarkInfo> benchmark_info = std::nullopt;
	ASSERT_OK_AND_ASSIGN(const TaskId prefill_task_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, prefill_task_id, std::move(inputs),
	/dependency_task_ids=/{},
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	/callback=/[](absl::StatusOr<Responses> responses) {}));
	ASSERT_OK(
	execution_manager_->WaitUntilDone(prefill_task_id, absl::Seconds(3)));

	ASSERT_OK_AND_ASSIGN(const TaskId decode_task_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddDecodeTask(
	session_id, decode_task_id,
	/dependency_task_ids=/{},
	/constraint=/nullptr,
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	std::move(callback)));

	EXPECT_OK(
	execution_manager_->WaitUntilDone(decode_task_id, absl::Seconds(3)));

	EXPECT_THAT(task_states,
	ElementsAre(TaskState::kCreated, TaskState::kQueued,
	TaskState::kProcessing, TaskState::kProcessing,
	TaskState::kProcessing, TaskState::kDone));

	EXPECT_THAT(responses_texts, ElementsAre("4", "5"));
	}

	TEST_F(ExecutionManagerTest, CreateAndRunDependentTasks) {
	CreateExecutionManager(CreateDefaultFakeLlmExecutor());

	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	std::optional<BenchmarkInfo> benchmark_info = std::nullopt;
	ASSERT_OK_AND_ASSIGN(const TaskId task_a_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, task_a_id, std::move(inputs),
	/dependency_task_ids=/{},
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	/callback=/nullptr));

	ASSERT_OK_AND_ASSIGN(const TaskId task_b_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddDecodeTask(
	session_id, task_b_id,
	/dependency_task_ids=/{task_a_id},
	/constraint=/nullptr,
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	/callback=/nullptr));

	EXPECT_OK(execution_manager_->WaitUntilDone(task_b_id, absl::Seconds(1)));
	EXPECT_OK(execution_manager_->WaitUntilDone(task_a_id, absl::Seconds(1)));
	}

	TEST_F(ExecutionManagerTest, CreateTaskWithInvalidDependency) {
	CreateExecutionManager(CreateDefaultFakeLlmExecutor());

	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<InputData> inputs;
	inputs.push_back(InputText("test"));
	std::optional<BenchmarkInfo> benchmark_info = std::nullopt;
	ASSERT_OK_AND_ASSIGN(const TaskId task_id,
	execution_manager_->GetNewTaskId());
	auto add_task_status = execution_manager_->AddPrefillTask(
	session_id, task_id, std::move(inputs),
	/dependency_task_ids=/{12345},
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	/callback=/nullptr);
	EXPECT_FALSE(add_task_status.ok());
	EXPECT_EQ(add_task_status.code(), absl::StatusCode::kInvalidArgument);
	}

	TEST_F(ExecutionManagerTest, CreateTaskWithInvalidDependencyId) {
	CreateExecutionManager(CreateDefaultFakeLlmExecutor());

	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	// Add a valid task.
	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	std::optional<BenchmarkInfo> benchmark_info = std::nullopt;
	ASSERT_OK_AND_ASSIGN(const TaskId task_a_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, task_a_id, std::move(inputs),
	/dependency_task_ids=/{},
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	/callback=/nullptr));
	EXPECT_OK(execution_manager_->WaitUntilDone(task_a_id, absl::Seconds(1)));

	// Try to add a task with an invalid dependency.
	std::vector<InputData> inputs_b;
	ASSERT_OK_AND_ASSIGN(auto input_text_b,
	tokenizer_->TokenIdsToTensorBuffer({4, 5, 6}));
	inputs_b.push_back(InputText(std::move(input_text_b)));
	const TaskId invalid_task_id = 99999;
	auto task_status = execution_manager_->AddPrefillTask(
	session_id, invalid_task_id, std::move(inputs_b),
	/dependency_task_ids=/{invalid_task_id},
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	/callback=/nullptr);
	EXPECT_FALSE(task_status.ok());
	EXPECT_EQ(task_status.code(), absl::StatusCode::kInvalidArgument);
	EXPECT_THAT(task_status.message(),
	testing::HasSubstr("Dependency task 99999 not found"));
	}

	TEST_F(ExecutionManagerTest, WaitUntilTaskDoneTimeout) {
	auto prefill_tokens = std::vector<std::vector<int>>{};
	auto decode_tokens = std::vector<std::vector<int>>{};
	decode_tokens.push_back({4});
	decode_tokens.push_back({5});
	decode_tokens.push_back({6});
	auto fake_llm_executor = std::make_unique<FakeLlmExecutor>(
	/vocab_size=/10,
	/prefill_tokens=/std::move(prefill_tokens),
	/decode_tokens=/std::move(decode_tokens));

	// Inject a long delay to simulate a timeout.
	fake_llm_executor->SetDecodeDelay(absl::Seconds(0.5));

	CreateExecutionManager(std::move(fake_llm_executor));

	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::optional<BenchmarkInfo> benchmark_info = std::nullopt;
	ASSERT_OK_AND_ASSIGN(const TaskId task_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddDecodeTask(
	session_id, task_id,
	/dependency_task_ids=/{},

	/constraint=/nullptr,
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	/callback=/nullptr));

	EXPECT_EQ(
	execution_manager_->WaitUntilDone(task_id, absl::Milliseconds(100)),
	absl::DeadlineExceededError(absl::StrCat(
	"Task ", task_id, " did not complete within the timeout of 100ms.")));

	// Wait for the task to actually finish to avoid use after free.
	EXPECT_OK(execution_manager_->WaitUntilDone(task_id, absl::Seconds(3)));
	}

	TEST_F(ExecutionManagerTest, WaitUntilAllDoneTimeout) {
	auto prefill_tokens = std::vector<std::vector<int>>{};
	auto decode_tokens = std::vector<std::vector<int>>{};
	decode_tokens.push_back({4});
	decode_tokens.push_back({5});
	decode_tokens.push_back({6});
	auto fake_llm_executor = std::make_unique<FakeLlmExecutor>(
	/vocab_size=/10,
	/prefill_tokens=/std::move(prefill_tokens),
	/decode_tokens=/std::move(decode_tokens));

	// Inject a long delay to simulate a timeout.
	fake_llm_executor->SetDecodeDelay(absl::Seconds(0.5));

	CreateExecutionManager(std::move(fake_llm_executor));

	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::optional<BenchmarkInfo> benchmark_info = std::nullopt;
	ASSERT_OK_AND_ASSIGN(const TaskId task_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddDecodeTask(
	session_id, task_id,
	/dependency_task_ids=/{},
	/constraint=/nullptr,
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	/callback=/nullptr));

	EXPECT_EQ(
	execution_manager_->WaitUntilAllDone(absl::Milliseconds(100)).code(),
	absl::StatusCode::kDeadlineExceeded);

	// Wait for the task to actually finish to avoid use after free.
	EXPECT_OK(execution_manager_->WaitUntilDone(task_id, absl::Seconds(3)));
	}

	TEST_F(ExecutionManagerTest, TaskReturnsError) {
	auto prefill_tokens = std::vector<std::vector<int>>{};
	auto decode_tokens = std::vector<std::vector<int>>{};
	prefill_tokens.push_back({1, 2, 3});
	auto fake_llm_executor = std::make_unique<FakeLlmExecutor>(
	/vocab_size=/10,
	/prefill_tokens=/std::move(prefill_tokens),
	/decode_tokens=/std::move(decode_tokens));

	// Inject an error.
	fake_llm_executor->SetPrefillStatus(absl::InternalError("Executor failed"));

	CreateExecutionManager(std::move(fake_llm_executor));

	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	absl::Status final_status = absl::OkStatus();
	ASSERT_OK_AND_ASSIGN(const TaskId task_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, task_id, std::move(inputs), {},
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	[&](absl::StatusOr<Responses> responses) {
	if (!responses.ok()) {
	final_status = responses.status();
	}
	}));

	EXPECT_OK(execution_manager_->WaitUntilDone(task_id, absl::Seconds(1)));
	EXPECT_EQ(final_status, absl::InternalError("Executor failed"));
	}

	TEST_F(ExecutionManagerTest, CreateDependentTaskOnFailedTask) {
	auto prefill_tokens = std::vector<std::vector<int>>{};
	auto decode_tokens = std::vector<std::vector<int>>{};
	prefill_tokens.push_back({1, 2, 3});
	decode_tokens.push_back({4});
	decode_tokens.push_back({5});
	decode_tokens.push_back({6});
	auto fake_llm_executor = std::make_unique<FakeLlmExecutor>(
	/vocab_size=/10,
	/prefill_tokens=/std::move(prefill_tokens),
	/decode_tokens=/std::move(decode_tokens));

	// Inject an error.
	fake_llm_executor->SetPrefillStatus(absl::InternalError("Executor failed"));

	CreateExecutionManager(std::move(fake_llm_executor));

	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	std::optional<BenchmarkInfo> benchmark_info = std::nullopt;
	absl::Status task_a_status = absl::OkStatus();
	ASSERT_OK_AND_ASSIGN(const TaskId task_a_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, task_a_id, std::move(inputs), {},
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	[&](absl::StatusOr<Responses> responses) {
	task_a_status = responses.status();
	}));

	EXPECT_OK(execution_manager_->WaitUntilDone(task_a_id, absl::Seconds(1)));
	EXPECT_EQ(task_a_status, absl::InternalError("Executor failed"));

	absl::Status task_b_status = absl::OkStatus();
	std::vector<TaskState> task_b_states;
	ASSERT_OK_AND_ASSIGN(const TaskId task_b_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddDecodeTask(
	session_id, task_b_id,
	/dependency_task_ids=/{task_a_id},
	/constraint=/nullptr,
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	[&](absl::StatusOr<Responses> responses) {
	task_b_status = responses.status();
	if (responses.ok()) {
	task_b_states.push_back(responses->GetTaskState());
	}
	}));

	EXPECT_OK(execution_manager_->WaitUntilDone(task_b_id, absl::Seconds(1)));
	EXPECT_EQ(task_b_status, absl::OkStatus());
	EXPECT_THAT(task_b_states, ElementsAre(TaskState::kDependentTaskFailed));
	}

	TEST_F(ExecutionManagerTest, AddDecodeTaskWithConstraintWithInternalSampler) {
	// The default execution manager is using the internal sampler.
	CreateExecutionManager(CreateDefaultFakeLlmExecutor());

	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	std::optional<BenchmarkInfo> benchmark_info = std::nullopt;
	ASSERT_OK_AND_ASSIGN(const TaskId task_a_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, task_a_id, std::move(inputs),
	/dependency_task_ids=/{},
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	/callback=/nullptr));

	ASSERT_OK_AND_ASSIGN(const TaskId task_b_id,
	execution_manager_->GetNewTaskId());
	// Fake constraint that expects "45".
	std::vector<int> expected_token_ids = {4, 0};
	auto constraint = FakeConstraint(expected_token_ids, /vocabulary_size=/10);
	auto decode_config = DecodeConfig::CreateDefault();
	decode_config.SetConstraint(&constraint);
	std::vector<std::string> response_texts;
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback =
	[&response_texts](absl::StatusOr<Responses> responses) {
	ASSERT_OK(responses);
	if (!responses->GetTexts().empty()) {
	response_texts.push_back(responses->GetTexts()[0]);
	}
	};

	ASSERT_OK(execution_manager_->AddDecodeTask(
	session_id, task_b_id,
	/dependency_task_ids=/{task_a_id}, decode_config.GetConstraint(),
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	std::move(callback)));

	EXPECT_OK(execution_manager_->WaitUntilDone(task_b_id, absl::Seconds(3)));

	EXPECT_THAT(response_texts, ElementsAre("4"));
	}

	TEST_F(ExecutionManagerTest, AddDecodeTaskWithConstraintWithExternalSampler) {
	auto prefill_tokens = std::vector<std::vector<int>>{};
	auto decode_tokens = std::vector<std::vector<int>>{};
	prefill_tokens.push_back({1, 2, 3});
	prefill_tokens.push_back({0});
	decode_tokens.push_back({4});
	decode_tokens.push_back({5});
	decode_tokens.push_back({6});

	CreateExecutionManager(std::make_unique<FakeLlmExecutor>(
	/vocab_size=/10,
	/prefill_tokens=/std::move(prefill_tokens),
	/decode_tokens=/std::move(decode_tokens)));

	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig(
	/use_external_sampler=/true));
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	std::optional<BenchmarkInfo> benchmark_info = std::nullopt;
	ASSERT_OK_AND_ASSIGN(const TaskId task_a_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, task_a_id, std::move(inputs),
	/dependency_task_ids=/{},
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	/callback=/nullptr));

	ASSERT_OK_AND_ASSIGN(const TaskId task_b_id,
	execution_manager_->GetNewTaskId());
	// Fake constraint that expects "45".
	std::vector<int> expected_token_ids = {4, 0};
	auto constraint = FakeConstraint(expected_token_ids, /vocabulary_size=/10);
	auto decode_config = DecodeConfig::CreateDefault();
	decode_config.SetConstraint(&constraint);
	std::vector<std::string> response_texts;
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback =
	[&response_texts](absl::StatusOr<Responses> responses) {
	ASSERT_OK(responses);
	if (!responses->GetTexts().empty()) {
	response_texts.push_back(responses->GetTexts()[0]);
	}
	};

	ASSERT_OK(execution_manager_->AddDecodeTask(
	session_id, task_b_id,
	/dependency_task_ids=/{task_a_id}, decode_config.GetConstraint(),
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	std::move(callback)));

	EXPECT_OK(execution_manager_->WaitUntilDone(task_b_id, absl::Seconds(3)));

	EXPECT_THAT(response_texts, ElementsAre("4"));
	}

	TEST_F(ExecutionManagerTest, AddTextScoringTask) {
	CreateExecutionManager(CreateDefaultFakeLlmExecutor());
	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));

	std::vector<TaskState> task_states;
	std::vector<float> scores;
	absl::AnyInvocable<void(absl::StatusOr<Responses>)> callback =
	[&task_states, &scores](absl::StatusOr<Responses> responses) {
	ASSERT_OK(responses);
	task_states.push_back(responses->GetTaskState());
	if (!responses->GetScores().empty()) {
	scores.push_back(responses->GetScores()[0]);
	}
	};

	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	ASSERT_OK_AND_ASSIGN(const TaskId prefill_task_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, prefill_task_id, std::move(inputs),
	/dependency_task_ids=/{},
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	/callback=/[](absl::StatusOr<Responses> responses) {}));
	ASSERT_OK(
	execution_manager_->WaitUntilDone(prefill_task_id, absl::Seconds(3)));

	ASSERT_OK_AND_ASSIGN(const TaskId scoring_task_id,
	execution_manager_->GetNewTaskId());
	const std::vector<absl::string_view> target_text = {"45"};
	EXPECT_CALL(*tokenizer_, TextToTokenIds("45"))
	.WillOnce(Return(std::vector<int>({4, 5})));

	ASSERT_OK(execution_manager_->AddTextScoringTask(
	session_id, scoring_task_id,
	/dep_tasks=/{}, target_text,
	/store_token_lengths=/false,
	/cancelled=/std::make_shared<std::atomic<bool>>(false),
	std::move(callback)));

	EXPECT_OK(
	execution_manager_->WaitUntilDone(scoring_task_id, absl::Seconds(3)));

	EXPECT_THAT(task_states,
	ElementsAre(TaskState::kCreated, TaskState::kQueued,
	TaskState::kProcessing, TaskState::kDone));

	// The FakeLlmExecutor is set up to expect tokens 4, 5, 6.
	// The target text "45" corresponds to tokens 4, 5.
	// The fake executor will produce logits that give prob 1 to the next
	// expected token. So for the first token '4', the expected is '4', prob is 1,
	// log-prob is 0. For the second token '5', the expected is '5', prob is 1,
	// log-prob is 0. Total score is 0.
	ASSERT_EQ(scores.size(), 1);
	EXPECT_FLOAT_EQ(scores[0], 0.0f);
	}

	TEST_F(ExecutionManagerTest, GetCurrentStep) {
	CreateExecutionManager(CreateDefaultFakeLlmExecutor());
	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));
	ASSERT_OK_AND_ASSIGN(auto session_info,
	execution_manager_->GetSessionInfo(session_id));

	// Initially step should be 0.
	ASSERT_OK_AND_ASSIGN(int step1,
	execution_manager_->GetCurrentStep(*session_info));
	EXPECT_EQ(step1, 0);

	// Run a prefill task.
	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	ASSERT_OK_AND_ASSIGN(const TaskId task_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, task_id, std::move(inputs), {},
	std::make_shared<std::atomic<bool>>(false), nullptr));
	EXPECT_OK(execution_manager_->WaitUntilDone(task_id, absl::Seconds(3)));

	// After prefill, step should be updated (3 tokens).
	ASSERT_OK_AND_ASSIGN(int step2,
	execution_manager_->GetCurrentStep(*session_info));
	EXPECT_EQ(step2, 3);
	}

	TEST_F(ExecutionManagerTest, SetCurrentStep) {
	CreateExecutionManager(CreateDefaultFakeLlmExecutor());
	ASSERT_OK_AND_ASSIGN(auto session_config, CreateDefaultSessionConfig());
	ASSERT_OK_AND_ASSIGN(const SessionId session_id,
	execution_manager_->RegisterNewSession(session_config));
	ASSERT_OK_AND_ASSIGN(auto session_info,
	execution_manager_->GetSessionInfo(session_id));

	// Run a prefill task to increase step to 3.
	std::vector<InputData> inputs;
	ASSERT_OK_AND_ASSIGN(auto input_text,
	tokenizer_->TokenIdsToTensorBuffer({1, 2, 3}));
	inputs.push_back(InputText(std::move(input_text)));
	ASSERT_OK_AND_ASSIGN(const TaskId task_id,
	execution_manager_->GetNewTaskId());
	ASSERT_OK(execution_manager_->AddPrefillTask(
	session_id, task_id, std::move(inputs), {},
	std::make_shared<std::atomic<bool>>(false), nullptr));
	EXPECT_OK(execution_manager_->WaitUntilDone(task_id, absl::Seconds(3)));

	// Verify current step is 3.
	ASSERT_OK_AND_ASSIGN(int step1,
	execution_manager_->GetCurrentStep(*session_info));
	EXPECT_EQ(step1, 3);

	// Set current step to 1.
	EXPECT_OK(execution_manager_->SetCurrentStep(*session_info, 1));

	// Verify current step is now 1.
	ASSERT_OK_AND_ASSIGN(int step2,
	execution_manager_->GetCurrentStep(*session_info));
	EXPECT_EQ(step2, 1);

	// Try to set current step to 5 (greater than current step 1).
	EXPECT_THAT(execution_manager_->SetCurrentStep(*session_info, 5),
	testing::status::StatusIs(absl::StatusCode::kInvalidArgument));
	}

	} // namespace
	} // namespace litert::lm