Spaces:

FINAL-Bench
/

LiteRT-LM

Running

File size: 35,301 Bytes

5f923cd

// Copyright 2025 The ODML Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "runtime/framework/resource_management/resource_manager.h"

#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>

#include "absl/base/nullability.h"  // from @com_google_absl
#include "absl/container/flat_hash_map.h"  // from @com_google_absl
#include "absl/status/status.h"  // from @com_google_absl
#include "absl/status/statusor.h"  // from @com_google_absl
#include "absl/strings/str_cat.h"  // from @com_google_absl
#include "absl/strings/string_view.h"  // from @com_google_absl
#include "absl/synchronization/mutex.h"  // from @com_google_absl
#include "absl/types/span.h"  // from @com_google_absl
#include "litert/cc/litert_tensor_buffer.h"  // from @litert
#include "runtime/components/model_resources.h"
#include "runtime/engine/engine_settings.h"
#include "runtime/engine/io_types.h"
#include "runtime/executor/audio_executor.h"
#include "runtime/executor/audio_executor_settings.h"
#include "runtime/executor/executor_settings_base.h"
#include "runtime/executor/llm_executor.h"
#include "runtime/executor/llm_executor_io_types.h"
#include "runtime/executor/llm_executor_processed_tokens.h"
#include "runtime/executor/llm_executor_settings.h"
#include "runtime/executor/vision_executor.h"
#include "runtime/executor/vision_executor_settings.h"
#include "runtime/framework/resource_management/context_handler/context_handler.h"
#include "runtime/framework/resource_management/utils/movable_mutex_lock.h"
#include "runtime/framework/resource_management/utils/resource_manager_utils.h"
#include "runtime/util/convert_tensor_buffer.h"
#include "runtime/util/status_macros.h"  // IWYU pragma: keep

namespace litert::lm {
namespace {

// Saves the current processed context within the llm_executor to the previous
// handler's shared_processed_context, and link the current handler to an
// empty processed_context, which represents the current handler's processed
// context is already loaded in the llm_executor. Notice that the
// context_handler is assumed to be loaded in the llm_executor via the
// AcquireWithContext method, meaning the context_handler should not own any
// RuntimeState, RuntimeConfig and actual ProcessedContext within it, as they
// are all owned by the llm_executor at this point.
absl::Status SaveProcessedContextAndSeparateLoadedHandler(
    std::shared_ptr<ContextHandler> context_handler,
    std::shared_ptr<LlmExecutor> llm_executor) {
  RET_CHECK_EQ(
      context_handler->HasRuntimeConfig() ||
          context_handler->HasRuntimeState() ||
          context_handler->shared_processed_context()->HasProcessedContext(),
      false)
      << "The context_handler "
         "should not own any RuntimeState, RuntimeConfig and actual "
         "ProcessedContext within it, as they should all be owned by the "
         "llm_executor when calling "
         "SaveProcessedContextAndSeparateLoadedHandler.";
  ASSIGN_OR_RETURN(auto llm_context, llm_executor->CloneContext());
  ASSIGN_OR_RETURN(auto current_processed_context,
                   llm_context->RetrieveProcessedContext());
  RETURN_IF_ERROR(
      context_handler->shared_processed_context()->SetProcessedContext(
          std::move(current_processed_context)));

  auto new_shared_processed_context =
      std::make_shared<ContextHandler::SharedProcessedContext>(nullptr);
  RETURN_IF_ERROR(context_handler->UpdateSharedProcessedContext(
      new_shared_processed_context));
  return absl::OkStatus();
}

}  // namespace

class LockedVisionExecutor : public VisionExecutor {
 public:
  LockedVisionExecutor(std::shared_ptr<VisionExecutor> vision_executor,
                       MovableMutexLock lock)
      : vision_executor_(std::move(vision_executor)), lock_(std::move(lock)) {}

  absl::StatusOr<ExecutorVisionData> Encode(
      const TensorBuffer& input_image_tensor) override {
    return vision_executor_->Encode(input_image_tensor);
  }

  absl::StatusOr<ExecutorVisionData> Encode(
      const absl::flat_hash_map<std::string, TensorBuffer>& input_tensors)
      override {
    return vision_executor_->Encode(std::move(input_tensors));
  }

  absl::StatusOr<std::vector<int>> GetExpectedInputDimension() const override {
    return vision_executor_->GetExpectedInputDimension();
  }

  absl::StatusOr<VisionExecutorProperties> GetVisionExecutorProperties()
      const override {
    return vision_executor_->GetVisionExecutorProperties();
  }

 private:
  std::shared_ptr<VisionExecutor> vision_executor_;
  // The mutex lock.
  MovableMutexLock lock_;
};

class LockedAudioExecutor : public AudioExecutor {
 public:
  LockedAudioExecutor(std::shared_ptr<AudioExecutor> audio_executor,
                      MovableMutexLock lock)
      : audio_executor_(std::move(audio_executor)), lock_(std::move(lock)) {}

  absl::StatusOr<ExecutorAudioData> Encode(
      const TensorBuffer& input_spectrogram_tensor) override {
    return audio_executor_->Encode(input_spectrogram_tensor);
  }

  absl::Status Reset() override { return audio_executor_->Reset(); }

  absl::StatusOr<AudioExecutorProperties> GetAudioExecutorProperties()
      const override {
    return audio_executor_->GetAudioExecutorProperties();
  }

  absl::StatusOr<std::unique_ptr<AudioContext>> CreateNewContext() override {
    return audio_executor_->CreateNewContext();
  }

  absl::StatusOr<std::unique_ptr<AudioContext>> CloneContext() override {
    return audio_executor_->CloneContext();
  }

  absl::StatusOr<std::unique_ptr<AudioContext>> CloneContext(
      const AudioContext& audio_context) override {
    return audio_executor_->CloneContext(audio_context);
  }

  absl::Status RestoreContext(
      std::unique_ptr<AudioContext> audio_context) override {
    return audio_executor_->RestoreContext(std::move(audio_context));
  }

 private:
  std::shared_ptr<AudioExecutor> audio_executor_;
  // The mutex lock.
  MovableMutexLock lock_;
};

// LockedLlmExecutor's behavior should be the same as LlmExecutor, but instead
// wraps the llm executor, the corresponding mutex lock, and some additional
// optimization logic before forwarding the request to the llm executor.
// The Optimization includes:
// 1(remove matching tokens): Update the input_ids and current_step by
// removing
//    the matching tokens from the processed tokens.
// 2(Copy on write): If the current handler is not the longest handler,
// retrieve
//    the processed_context for the previous handler, and update the current
//    handler's shared_processed_context.
// For more details, please refer to go/llm_resource_manager.
class LockedLlmExecutor : public LlmExecutor {
 public:
  // LockedLlmExecutor takes ownership of the mutex lock and holds the
  // shared_ptr to the executor.
  LockedLlmExecutor(std::shared_ptr<LlmExecutor> executor,
                    MovableMutexLock lock,
                    std::shared_ptr<ContextHandler> current_handler = nullptr)
      : current_handler_(current_handler),
        llm_executor_(std::move(executor)),
        lock_(std::move(lock)) {}

  absl::string_view ExecutorBackendName() const override {
    return llm_executor_->ExecutorBackendName();
  }

  absl::Status Prefill(const ExecutorInputs& inputs) override {
    return Prefill(inputs, ExecutorPrefillParams());
  }

  absl::Status Prefill(const ExecutorInputs& inputs,
                       const ExecutorPrefillParams& prefill_params) override {
    // If the executor is not acquired by any handler, forward the prefill
    // request to the executor directly.
    if (current_handler_ == nullptr) {
      return llm_executor_->Prefill(inputs, prefill_params);
    }
    // Check if the input token is 1 batch. Currently only support 1 batch per
    // prefill.
    ASSIGN_OR_RETURN(auto token_ids, inputs.GetTextTokenIdsPtr());
    LITERT_ASSIGN_OR_RETURN(auto token_ids_tensor_type,
                            token_ids->TensorType());
    RET_CHECK_EQ(token_ids_tensor_type.Layout().Dimensions()[0], 1);
    if (token_ids_tensor_type.Layout().Dimensions()[1] == 0) {
      return absl::OkStatus();
    }
    ASSIGN_OR_RETURN(int current_step, llm_executor_->GetCurrentStep());
    if (prefill_params.GetCurrentStep() != -1) {
      current_step = prefill_params.GetCurrentStep();
    }
    ASSIGN_OR_RETURN(const ProcessedTokens* processed_tokens,
                     llm_executor_->GetProcessedTokens());
    // If the current_step is pointing at the step right after the last
    // processed token, call executor directly, no optimization for the
    // input can be done.
    if (processed_tokens->TokenCount() == current_step) {
      return llm_executor_->Prefill(inputs, prefill_params);
    }

    LITERT_ASSIGN_OR_RETURN(
        auto input_ids_vec,
        CopyFromTensorBuffer<int32_t>(*(*inputs.GetTextTokenIdsPtr())));

    // If the current_step is not pointing at the step right after the last
    // processed token, update the input_ids and current_step by removing
    // the matching tokens, and then call llm_executor_->Prefill with the
    // optimized inputs and time step.

    // If the processed tokens size is larger than the current step, update
    // the input_ids and current_step by removing the matching tokens.
    RETURN_IF_ERROR(RemoveMatchingTokens(processed_tokens->GetCopyOfTokens()[0],
                                         &input_ids_vec, &current_step));
    // If the updated input_ids is empty, meaning all required prefill
    // tokens have been processed previously, just set the current step and
    // return.
    if (input_ids_vec.empty()) {
      RETURN_IF_ERROR(llm_executor_->SetCurrentStep(current_step));
      return absl::OkStatus();
    }

    // TODO: b/409401231 - Add unit tests for the new_inputs creation.
    LITERT_ASSIGN_OR_RETURN(
        auto new_inputs_token_ids,
        CopyToTensorBuffer(
            absl::MakeConstSpan(input_ids_vec.data(), input_ids_vec.size()),
            {1, static_cast<int>(input_ids_vec.size())}));
    std::optional<ExecutorVisionData> new_vision_data = std::nullopt;
    std::optional<ExecutorAudioData> new_audio_data = std::nullopt;
    if (inputs.GetVisionDataPtr().ok()) {
      new_vision_data = ExecutorVisionData();
      LITERT_ASSIGN_OR_RETURN(
          auto new_vision_embeddings,
          inputs.GetVisionEmbeddingsPtr().value()->Duplicate());
      new_vision_data->SetEmbeddings(std::move(new_vision_embeddings));
      if (inputs.GetVisionDataPtr().value()->GetPerLayerEmbeddingsPtr().ok()) {
        LITERT_ASSIGN_OR_RETURN(auto new_per_layer_embeddings,
                                inputs.GetVisionDataPtr()
                                    .value()
                                    ->GetPerLayerEmbeddingsPtr()
                                    .value()
                                    ->Duplicate());
        new_vision_data->SetPerLayerEmbeddings(
            std::move(new_per_layer_embeddings));
      }
    }
    if (inputs.GetAudioEmbeddingsPtr().ok()) {
      new_audio_data = ExecutorAudioData();
      LITERT_ASSIGN_OR_RETURN(
          auto new_audio_embeddings,
          inputs.GetAudioEmbeddingsPtr().value()->Duplicate());
      new_audio_data->SetEmbeddings(std::move(new_audio_embeddings));
      if (inputs.GetAudioDataPtr().value()->GetPerLayerEmbeddingsPtr().ok()) {
        LITERT_ASSIGN_OR_RETURN(auto new_per_layer_embeddings,
                                inputs.GetAudioDataPtr()
                                    .value()
                                    ->GetPerLayerEmbeddingsPtr()
                                    .value()
                                    ->Duplicate());
        new_audio_data->SetPerLayerEmbeddings(
            std::move(new_per_layer_embeddings));
      }
    }
    auto new_inputs =
        ExecutorInputs(ExecutorTextData(std::move(new_inputs_token_ids)),
                       std::move(new_vision_data), std::move(new_audio_data));

    auto new_prefill_query_params = prefill_params;
    new_prefill_query_params.SetCurrentStep(current_step);
    // If the current_step is pointing at the step following the last
    // processed token after removing the matching tokens, call executor
    // directly.
    if (processed_tokens->TokenCount() == current_step) {
      return llm_executor_->Prefill(new_inputs, new_prefill_query_params);
    }
    // If the updated current_steps_ is still less than the processed tokens
    // size, meaning part of the processed tokens does not match the
    // input_ids.
    // Confirm if the current handler is the longest handler. If not,
    // cloning processed context is required to avoid modifying the
    // processed context of other handlers.
    ASSIGN_OR_RETURN(
        int largest_time_step,
        current_handler_->shared_processed_context()->LongestHandlerTimeStep(
            *llm_executor_));
    if (largest_time_step != current_step) {
      // If the current handler is not the longest handler, retrieve the
      // processed_context for the previous handler, and update the current
      // handler's shared_processed_context.
      RETURN_IF_ERROR(SaveProcessedContextAndSeparateLoadedHandler(
          current_handler_, llm_executor_));
    }
    // Update the current step since the new processed context (set above)
    // might not match the executor's current step, and the processed
    // context may need to be truncated.
    // TODO: b/418002952 - Consider setting the current step within Prefill
    // rather than relying on the caller.
    RETURN_IF_ERROR(llm_executor_->SetCurrentStep(current_step));
    return llm_executor_->Prefill(new_inputs, new_prefill_query_params);
  }

  absl::StatusOr<std::vector<std::vector<int>>> Decode() override {
    return Decode(ExecutorDecodeParams());
  }

  absl::StatusOr<std::vector<std::vector<int>>> Decode(
      const ExecutorDecodeParams& decode_params) override {
    RETURN_IF_ERROR(MaybeTruncateProcessedTokens());
    return llm_executor_->Decode(decode_params);
  }

  absl::Status Decode(const ExecutorInputs& inputs,
                      TensorBuffer& output_logits) override {
    RETURN_IF_ERROR(MaybeTruncateProcessedTokens());
    ASSIGN_OR_RETURN(output_logits, llm_executor_->DecodeLogits(inputs));
    return absl::OkStatus();
  }

  absl::StatusOr<TensorBuffer> DecodeLogits(
      const ExecutorInputs& inputs) override {
    ASSIGN_OR_RETURN(int current_step, llm_executor_->GetCurrentStep());
    ASSIGN_OR_RETURN(const ProcessedTokens* processed_tokens,
                     llm_executor_->GetProcessedTokens());
    // If the current step is pointing at right after the pending token, set
    // the current step to the previous step. This ensures that the current
    // step points to the token to be processed, as expected by
    // llm_executor_->DecodeLogits().
    if (current_step == processed_tokens->TokenCount() &&
        !processed_tokens->GetNextUnprocessedToken().token.empty()) {
      RETURN_IF_ERROR(llm_executor_->SetCurrentStep(current_step - 1));
    }
    RETURN_IF_ERROR(MaybeTruncateProcessedTokens());
    return llm_executor_->DecodeLogits(inputs);
  }

  absl::StatusOr<std::unique_ptr<LlmContext>> CloneContext() const override {
    return llm_executor_->CloneContext();
  }

  absl::Status RestoreContext(
      std::unique_ptr<LlmContext> llm_context) override {
    return llm_executor_->RestoreContext(std::move(llm_context));
  }

  absl::Status UpdateRuntimeConfig(
      const RuntimeConfig& runtime_config) override {
    return llm_executor_->UpdateRuntimeConfig(runtime_config);
  }

  absl::StatusOr<RuntimeConfig> GetRuntimeConfig() const override {
    return llm_executor_->GetRuntimeConfig();
  }

  absl::Status UpdateRuntimeState(const RuntimeState& runtime_state) override {
    return llm_executor_->UpdateRuntimeState(runtime_state);
  }

  absl::StatusOr<RuntimeState> GetRuntimeState() const override {
    return llm_executor_->GetRuntimeState();
  }

  absl::StatusOr<LlmExecutorSettings> GetExecutorSettings() const override {
    return llm_executor_->GetExecutorSettings();
  }

  absl::StatusOr<int> GetCurrentStep() const override {
    return llm_executor_->GetCurrentStep();
  }

  absl::Status SetCurrentStep(int new_step) override {
    return llm_executor_->SetCurrentStep(new_step);
  }

  absl::StatusOr<const ProcessedTokens*> GetProcessedTokens() const override {
    return llm_executor_->GetProcessedTokens();
  }

  absl::Status LoadLoRA(uint32_t lora_id,
                        const ModelAssets& model_assets) override {
    return llm_executor_->LoadLoRA(lora_id, model_assets);
  }

  absl::Status Reset() override { return llm_executor_->Reset(); }

  absl::StatusOr<int> GetVocabSize() override {
    return llm_executor_->GetVocabSize();
  }

 private:
  absl::Status MaybeTruncateProcessedTokens() {
    if (current_handler_ == nullptr) {
      return absl::OkStatus();
    }
    ASSIGN_OR_RETURN(int current_step, llm_executor_->GetCurrentStep());
    ASSIGN_OR_RETURN(const ProcessedTokens* processed_tokens,
                     llm_executor_->GetProcessedTokens());
    if (processed_tokens->TokenCount() == current_step) {
      return absl::OkStatus();
    }

    // Confirm if the current handler is the longest handler. If not,
    // cloning processed context is required to avoid modifying the
    // processed context of other handlers.
    ASSIGN_OR_RETURN(
        int largest_time_step,
        current_handler_->shared_processed_context()->LongestHandlerTimeStep(
            *llm_executor_));
    if (largest_time_step != current_step) {
      // If the current handler is not the longest handler, retrieve the
      // processed_context for the previous handler, and update the current
      // handler's shared_processed_context.
      RETURN_IF_ERROR(SaveProcessedContextAndSeparateLoadedHandler(
          current_handler_, llm_executor_));
    }
    // Update the current step since the new processed context (set above)
    // might not match the executor's current step, and the processed
    // context may need to be truncated.
    // TODO: b/418002952 - Consider setting the current step within Decode
    // rather than relying on the caller.
    return llm_executor_->SetCurrentStep(current_step);
  }

  // The current context handler;
  std::shared_ptr<ContextHandler> current_handler_;

  // The executor.
  std::shared_ptr<LlmExecutor> llm_executor_;

  // The mutex lock.
  MovableMutexLock lock_;
};

std::optional<uint32_t> ResourceManager::AssignLoraId(
    std::string lora_path, bool has_scoped_lora_file) {
  if (lora_path.empty() && !has_scoped_lora_file) {
    return std::nullopt;
  }
  std::optional<uint32_t> lora_id;
  // If this session is using a new lora, assign a new unique lora id. Else,
  // assign the corresponding lora id according to the provided lora path in
  // the session config.
  if (!lora_path.empty()) {
    // Lora provided by both path and scoped file will use lora path as the
    // reference key if provided.
    if (lora_hash_to_id_.find(lora_path) == lora_hash_to_id_.end()) {
      // If the lora is new, assign the id.
      lora_hash_to_id_[lora_path] = lora_hash_to_id_.size();
    }
    lora_id = lora_hash_to_id_[lora_path];
  } else if (has_scoped_lora_file) {
    // Lora provided by scoped file but without lora path will be assumed to
    // be used only once. Assign a unique id for this session only.
    // TODO: b/346421150 - Extend support to map from scoped file to hash
    // key, for multiple same scoped file use case.
    lora_id = lora_hash_to_id_.size();
    lora_hash_to_id_["scoped_lora:" + absl::StrCat(lora_hash_to_id_.size())] =
        lora_id.value();
  }
  return lora_id;
}

absl::Status ResourceManager::MaybeCreateLitertEnv() {
  if (litert_env_ != nullptr) {
    return absl::OkStatus();
  }
  LITERT_ASSIGN_OR_RETURN(
      auto new_litert_env,
      litert::Environment::Create(std::vector<litert::Environment::Option>()));
  backup_litert_env_ =
      std::make_unique<litert::Environment>(std::move(new_litert_env));
  litert_env_ = backup_litert_env_.get();
  return absl::OkStatus();
}

absl::StatusOr<std::unique_ptr<ContextHandler>>
ResourceManager::CreateContextHandler(const SessionConfig& session_config) {
  // TODO: b/462499294 -
  //   1. Check if lora is loaded or not.
  //   2. Get the lora id.
  //   3. If lora is not loaded, load the lora.

  // Check if the lora is already loaded.
  // TODO: b/462499294 - Use the real lora path.
  bool lora_is_loaded =
      lora_hash_to_id_.find("fake_lora_path") != lora_hash_to_id_.end();

  // Find the lora id. If lora_id is not nullopt, it means the lora is used.
  std::optional<uint32_t> lora_id = AssignLoraId(
      /*lora_path=*/"",
      /*has_scoped_lora_file=*/session_config.GetScopedLoraFile() != nullptr);

  // If lora is used and not loaded, load the lora.
  if (lora_id.has_value() && !lora_is_loaded) {
    RET_CHECK(session_config.GetScopedLoraFile() != nullptr);
    ASSIGN_OR_RETURN(ModelAssets model_assets,
                     ModelAssets::Create(session_config.GetScopedLoraFile(),
                                         /*model_path=*/""));
    MovableMutexLock lock(&executor_mutex_);
    RETURN_IF_ERROR(llm_executor_->LoadLoRA(lora_id.value(), model_assets));
  }
  // TODO: b/462517405 - Remove this conversion from SamplerParams to
  // SamplerParams once the SamplerParams is cleaned up.
  odml::infra::proto::SamplerParameters sampler_params;
  switch (session_config.GetSamplerParams().type()) {
    case proto::SamplerParameters::TYPE_UNSPECIFIED: {
      sampler_params.set_type(
          odml::infra::proto::SamplerParameters::TYPE_UNSPECIFIED);
      break;
    }
    case proto::SamplerParameters::TOP_K: {
      sampler_params.set_type(odml::infra::proto::SamplerParameters::TOP_K);
      break;
    }
    case proto::SamplerParameters::TOP_P: {
      sampler_params.set_type(odml::infra::proto::SamplerParameters::TOP_P);
      break;
    }
    case proto::SamplerParameters::GREEDY: {
      sampler_params.set_type(odml::infra::proto::SamplerParameters::GREEDY);
      break;
    }
    default:
      return absl::InvalidArgumentError(
          absl::StrCat("Unsupported sampler type: ",
                       session_config.GetSamplerParams().type()));
  }
  sampler_params.set_k(session_config.GetSamplerParams().k());
  sampler_params.set_p(session_config.GetSamplerParams().p());
  sampler_params.set_temperature(
      session_config.GetSamplerParams().temperature());

  auto runtime_config = RuntimeConfig{
      .sampler_params = sampler_params,
      .output_heads = session_config.GetNumOutputCandidates(),
      // b/368348506 - Make tokens_per_decode configurable.
      .tokens_per_decode = 1,
  };

  std::unique_ptr<litert::lm::LlmContext> llm_context;
  {
    MovableMutexLock lock(&executor_mutex_);
    ASSIGN_OR_RETURN(llm_context,
                     llm_executor_->CreateNewContext(
                         std::move(lora_id), std::move(runtime_config)));
  }
  std::unique_ptr<AudioContext> audio_context;
  if (session_config.AudioModalityEnabled()) {
    RETURN_IF_ERROR(TryLoadingAudioExecutor());
    ASSIGN_OR_RETURN(auto audio_executor, AcquireAudioExecutor());
    auto audio_executor_properties =
        audio_executor->GetAudioExecutorProperties();
    if (audio_executor_properties.ok()) {
      if (audio_executor_properties->is_streaming_model) {
        ASSIGN_OR_RETURN(audio_context, audio_executor->CreateNewContext());
      }
    } else if (!absl::IsUnimplemented(audio_executor_properties.status())) {
      return audio_executor_properties.status();
    }
  }
  return ContextHandler::Create(std::move(llm_context),
                                std::move(audio_context));
}

absl::StatusOr<std::unique_ptr<ContextHandler>>
ResourceManager::CloneContextHandler(
    std::shared_ptr<const ContextHandler> llm_context_handler) {
  RET_CHECK_NE(llm_context_handler, nullptr)
      << "The provided context handler should not be null.";

  RuntimeConfig runtime_config;
  RuntimeState runtime_state;

  // If the context handler has the runtime config and runtime state, use
  // them directly.
  if (llm_context_handler->HasRuntimeConfig() &&
      llm_context_handler->HasRuntimeState()) {
    ASSIGN_OR_RETURN(runtime_config, llm_context_handler->GetRuntimeConfig());
    ASSIGN_OR_RETURN(runtime_state, llm_context_handler->GetRuntimeState());
  } else {
    // Otherwise, assume the context handler is loaded by the manager to the
    // executor, and get the runtime config and runtime state from the
    // executor.
    MovableMutexLock lock(&executor_mutex_);
    RET_CHECK_EQ(current_handler_, llm_context_handler)
        << "The provided context handler does not have the runtime config "
           "and "
           "runtime state, assuming it is loaded by the manager, but the "
           "manager does not have the same handler.";
    ASSIGN_OR_RETURN(runtime_config, llm_executor_->GetRuntimeConfig());
    ASSIGN_OR_RETURN(runtime_state, llm_executor_->GetRuntimeState());
  }
  auto processed_context = llm_context_handler->shared_processed_context();

  std::unique_ptr<AudioContext> audio_context;
  if (llm_context_handler->HasAudioContext()) {
    ASSIGN_OR_RETURN(auto audio_executor, AcquireAudioExecutor());
    ASSIGN_OR_RETURN(
        audio_context,
        audio_executor->CloneContext(llm_context_handler->GetAudioContext()));
  }
  return ContextHandler::Bundle(
      processed_context, std::make_unique<RuntimeConfig>(runtime_config),
      std::make_unique<RuntimeState>(runtime_state), std::move(audio_context));
}

absl::StatusOr<std::unique_ptr<LlmExecutor>>
ResourceManager::AcquireExecutor() {
  MovableMutexLock lock(&executor_mutex_);

  if (llm_executor_ == nullptr) {
    return absl::InvalidArgumentError(
        "Llm executor should not be null, please do not delete the shared "
        "executor "
        "in ResourceManager at any time.");
  }

  return std::make_unique<LockedLlmExecutor>(llm_executor_, std::move(lock));
}

absl::StatusOr<std::unique_ptr<LlmExecutor>>
ResourceManager::AcquireExecutorWithContextHandler(
    std::shared_ptr<ContextHandler> new_context_handler) {
  RET_CHECK_NE(new_context_handler, nullptr)
      << "The provided context handler should not be null.";

  MovableMutexLock lock(&executor_mutex_);
  RET_CHECK_NE(llm_executor_, nullptr) << "Llm executor should not be null, "
                                          "please do not delete the shared "
                                          "executor in ResourceManager at "
                                          "any time.";

  // If the new handler is the same as the current handler, return the
  // executor directly.
  if (new_context_handler == current_handler_) {
    return std::make_unique<LockedLlmExecutor>(llm_executor_, std::move(lock),
                                               current_handler_);
  }

  // If both handler are sharing the same processed context, save the
  // runtime config and runtime state back to the current handler. Then
  // update the executor with the new handler.
  if (current_handler_ != nullptr &&
      new_context_handler->shared_processed_context() ==
          current_handler_->shared_processed_context()) {
    ASSIGN_OR_RETURN(auto current_runtime_config,
                     llm_executor_->GetRuntimeConfig());
    ASSIGN_OR_RETURN(auto current_runtime_state,
                     llm_executor_->GetRuntimeState());
    RETURN_IF_ERROR(current_handler_->SetRuntimeConfig(
        std::make_unique<RuntimeConfig>(current_runtime_config)));
    RETURN_IF_ERROR(current_handler_->SetRuntimeState(
        std::make_unique<RuntimeState>(current_runtime_state)));

    ASSIGN_OR_RETURN(auto new_runtime_config,
                     new_context_handler->RetrieveRuntimeConfig());
    ASSIGN_OR_RETURN(auto new_runtime_state,
                     new_context_handler->RetrieveRuntimeState());
    RETURN_IF_ERROR(llm_executor_->UpdateRuntimeConfig(*new_runtime_config));
    RETURN_IF_ERROR(llm_executor_->UpdateRuntimeState(*new_runtime_state));
  } else {
    // If the new handler is not sharing the same processed context with the
    // current handler, clone the processed context to the new handler. Then
    // restore the executor with the new LlmContext.
    if (current_handler_ != nullptr) {
      ASSIGN_OR_RETURN(auto current_llm_context, llm_executor_->CloneContext());
      ASSIGN_OR_RETURN(auto current_runtime_config,
                       current_llm_context->RetrieveRuntimeConfig());
      ASSIGN_OR_RETURN(auto current_runtime_state,
                       current_llm_context->RetrieveRuntimeState());
      ASSIGN_OR_RETURN(auto current_processed_context,
                       current_llm_context->RetrieveProcessedContext());

      RETURN_IF_ERROR(current_handler_->SetRuntimeConfig(
          std::move(current_runtime_config)));
      RETURN_IF_ERROR(
          current_handler_->SetRuntimeState(std::move(current_runtime_state)));
      RETURN_IF_ERROR(
          current_handler_->shared_processed_context()->SetProcessedContext(
              std::move(current_processed_context)));
    }

    ASSIGN_OR_RETURN(auto new_runtime_config,
                     new_context_handler->RetrieveRuntimeConfig());
    ASSIGN_OR_RETURN(auto new_runtime_state,
                     new_context_handler->RetrieveRuntimeState());
    ASSIGN_OR_RETURN(auto new_processed_context,
                     new_context_handler->shared_processed_context()
                         ->RetrieveProcessedContext());
    auto llm_context = std::make_unique<LlmContext>(
        std::move(new_processed_context), std::move(new_runtime_config),
        std::move(new_runtime_state));
    RETURN_IF_ERROR(llm_executor_->RestoreContext(std::move(llm_context)));
  }

  // If the current handler has an audio context, update and save the audio
  // context to the current handler.
  if (current_handler_ != nullptr) {
    // If the current handler has an audio context, update it from audio
    // executor and save it back to the current handler.
    if (current_handler_->HasAudioContext()) {
      ASSIGN_OR_RETURN(auto audio_executor, AcquireAudioExecutor());
      ASSIGN_OR_RETURN(auto current_audio_context,
                       audio_executor->CloneContext());
      RETURN_IF_ERROR(
          current_handler_->SetAudioContext(std::move(current_audio_context)));
    }
    // If the new handler has an audio context, audio executor will restore
    // the audio context from the new handler.
    if (new_context_handler->HasAudioContext()) {
      ASSIGN_OR_RETURN(auto audio_executor, AcquireAudioExecutor());
      ASSIGN_OR_RETURN(
          auto audio_context_cloned,
          audio_executor->CloneContext(new_context_handler->GetAudioContext()));
      RETURN_IF_ERROR(
          audio_executor->RestoreContext(std::move(audio_context_cloned)));
    }
  }

  current_handler_ = new_context_handler;

  return std::make_unique<LockedLlmExecutor>(llm_executor_, std::move(lock),
                                             current_handler_);
}

absl::Status ResourceManager::TryLoadingVisionExecutor() {
  return absl::InvalidArgumentError(
      "Vision executor backend is not supported.");
}

absl::StatusOr<std::unique_ptr<VisionExecutor>>
ResourceManager::AcquireVisionExecutor() {
  MovableMutexLock lock(&vision_executor_mutex_);
  if (vision_executor_ == nullptr) {
    return absl::InvalidArgumentError(
        "Vision executor should not be null, please TryLoadingVisionExecutor() "
        "first.");
  }
  return std::make_unique<LockedVisionExecutor>(vision_executor_,
                                                std::move(lock));
}

absl::Status ResourceManager::TryLoadingAudioExecutor() {
  bool is_llm_gpu_artisan = false;
  if (audio_executor_settings_ && audio_executor_settings_->GetBackend() ==
                                      litert::lm::Backend::GPU_ARTISAN) {
    RET_CHECK(llm_executor_settings_.has_value());
    is_llm_gpu_artisan =
        (llm_executor_settings_->GetBackend() == Backend::GPU_ARTISAN);
  }

  absl::MutexLock lock(audio_executor_mutex_);
  if (audio_executor_ != nullptr) {
    return absl::OkStatus();
  }
  if (!audio_executor_settings_) {
    return absl::InvalidArgumentError("Audio options should not be null.");
  }
  if (audio_executor_settings_->GetBackend() == litert::lm::Backend::CPU ||
      audio_executor_settings_->GetBackend() == litert::lm::Backend::GPU) {
    return absl::InvalidArgumentError(
        "Audio executor backend is not supported.");
  } else {
    return absl::InvalidArgumentError(
        "Audio executor backend is not supported.");
  }
  return absl::OkStatus();
}

absl::StatusOr<std::unique_ptr<AudioExecutor>>
ResourceManager::AcquireAudioExecutor() {
  MovableMutexLock lock(&audio_executor_mutex_);
  if (audio_executor_ == nullptr) {
    return absl::InvalidArgumentError(
        "Audio executor should not be null, please TryLoadingAudioExecutor() "
        "first.");
  }
  return std::make_unique<LockedAudioExecutor>(audio_executor_,
                                               std::move(lock));
}

absl::StatusOr<std::unique_ptr<ResourceManager>> ResourceManager::Create(
    ModelResources* absl_nullable model_resources,
    std::unique_ptr<LlmExecutor> absl_nonnull llm_executor,
    std::unique_ptr<VisionExecutorSettings> absl_nullable
        vision_executor_settings,
    std::unique_ptr<litert::lm::AudioExecutorSettings> absl_nullable
        audio_executor_settings,
    ::litert::Environment* absl_nullable litert_env,
    std::unique_ptr<AudioExecutor> absl_nullable audio_executor) {
  if (llm_executor == nullptr) {
    return absl::InvalidArgumentError("Llm executor is null.");
  }
  ASSIGN_OR_RETURN(LlmExecutorSettings llm_executor_settings,
                   llm_executor->GetExecutorSettings());
  auto llm_resource_manager = std::make_unique<ResourceManager>(
      model_resources, std::move(llm_executor),
      std::move(vision_executor_settings), std::move(audio_executor_settings),
      std::move(llm_executor_settings), litert_env, std::move(audio_executor));
  return llm_resource_manager;
}

absl::StatusOr<AudioExecutorProperties>
ResourceManager::GetAudioExecutorProperties() {
  RETURN_IF_ERROR(TryLoadingAudioExecutor());
  MovableMutexLock lock(&audio_executor_mutex_);
  return audio_executor_->GetAudioExecutorProperties();
}

absl::StatusOr<VisionExecutorProperties>
ResourceManager::GetVisionExecutorProperties() {
  RETURN_IF_ERROR(TryLoadingVisionExecutor());
  absl::MutexLock lock(vision_executor_mutex_);
  return vision_executor_->GetVisionExecutorProperties();
}

}  // namespace litert::lm