File size: 9,413 Bytes
5f923cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
// Copyright 2025 The ODML Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef THIRD_PARTY_ODML_LITERT_LM_RUNTIME_FRAMEWORK_RESOURCE_MANAGEMENT_RESOURCE_MANAGER_H_
#define THIRD_PARTY_ODML_LITERT_LM_RUNTIME_FRAMEWORK_RESOURCE_MANAGEMENT_RESOURCE_MANAGER_H_

#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/nullability.h"  // from @com_google_absl
#include "absl/base/thread_annotations.h"  // from @com_google_absl
#include "absl/container/flat_hash_map.h"  // from @com_google_absl
#include "absl/status/status.h"  // from @com_google_absl
#include "absl/status/statusor.h"  // from @com_google_absl
#include "absl/synchronization/mutex.h"  // from @com_google_absl
#include "litert/cc/litert_environment.h"  // from @litert
#include "runtime/components/model_resources.h"
#include "runtime/engine/engine_settings.h"
#include "runtime/engine/io_types.h"
#include "runtime/executor/audio_executor.h"
#include "runtime/executor/audio_executor_settings.h"
#include "runtime/executor/llm_executor.h"
#include "runtime/executor/llm_executor_settings.h"
#include "runtime/executor/vision_executor.h"
#include "runtime/executor/vision_executor_settings.h"
#include "runtime/framework/resource_management/context_handler/context_handler.h"

namespace litert::lm {

// The ResourceManager provides thread-safe access to shared resources such
// as the LlmExecutor, enabling multiple sessions to utilize it concurrently.
class ResourceManager {
 public:
  explicit ResourceManager(
      ModelResources* absl_nullable model_resources,
      std::unique_ptr<LlmExecutor> llm_executor,
      std::unique_ptr<VisionExecutorSettings> vision_executor_settings,
      std::unique_ptr<AudioExecutorSettings> audio_executor_settings,
      LlmExecutorSettings llm_executor_settings,
      ::litert::Environment* absl_nullable litert_env,
      std::unique_ptr<AudioExecutor> audio_executor = nullptr)
      :  // dummy comment to prevent clang-format from moving the next line here
        llm_executor_(std::move(llm_executor)),
        vision_executor_settings_(std::move(vision_executor_settings)),
        audio_executor_(std::move(audio_executor)),
        audio_executor_settings_(std::move(audio_executor_settings)),
        litert_env_(litert_env),
        llm_executor_settings_(std::move(llm_executor_settings)) {}

  // Creates a ResourceManager with the provided llm_executor.
  // Note that the audio_executor is used for testing only (dependency
  // injection)
  static absl::StatusOr<std::unique_ptr<ResourceManager>> Create(
      ModelResources* absl_nullable model_resources,
      std::unique_ptr<LlmExecutor> absl_nonnull llm_executor,
      std::unique_ptr<VisionExecutorSettings> absl_nullable
          vision_executor_settings,
      std::unique_ptr<AudioExecutorSettings> absl_nullable
          audio_executor_settings,
      ::litert::Environment* absl_nullable litert_env,
      std::unique_ptr<AudioExecutor> absl_nullable audio_executor = nullptr);

  ~ResourceManager() = default;

  // Assigns the lora id from the given lora path or scoped file. If no lora is
  // used, will return std::nullopt instead of an uint32_t id.
  // If lora_path is not empty, it will be treated as the hash key, retrieving
  // the corresponding lora id from the lora_hash_to_id_ map if it exists.
  // Otherwise, a unique lora id will be assigned.
  // If lora_path is empty and has_scoped_lora_file is true, a unique lora id
  // will be assigned. Scoped file should be provided under
  // session_config_struct.scoped_lora_file, and the lora will be loaded while
  // calling CreateContextHandler.
  // If lora_path is empty and has_scoped_lora_file is false, std::nullopt will
  // be returned.
  // Noticed: If you intend to reuse a LoRA loaded via a scoped file, please
  // assign a unique, custom lora_path. This lora_path serves as the identifier
  // for the LoRA across all sessions referencing that scoped file.
  std::optional<uint32_t> AssignLoraId(std::string lora_path,
                                       bool has_scoped_lora_file);

  // Creates a new context handler from the provided session config struct.
  // If a session specific lora is provided, the lora will be loaded and the
  // corresponding lora id will be assigned.
  absl::StatusOr<std::unique_ptr<ContextHandler>> CreateContextHandler(
      const SessionConfig& session_config);

  // Clones the context handler.
  // The cloned context handler will have the same shared processed context as
  // the original context handler.
  // The cloned context handler's runtime config and runtime state however will
  // be copied from the original context handler, thus the values will initially
  // be the same, but can be different afterward.
  absl::StatusOr<std::unique_ptr<ContextHandler>> CloneContextHandler(
      std::shared_ptr<const ContextHandler> llm_context_handler);

  // Acquires the executor without any context handler. This function should
  // only be called when the usage of the returned executor does not involve any
  // state updates, e.g. CreateContext, GetCurrentStep(), etc.
  absl::StatusOr<std::unique_ptr<LlmExecutor>> AcquireExecutor()
      ABSL_LOCKS_EXCLUDED(executor_mutex_);

  // Acquires the executor after loading the provided context handle.
  // Typically, this function is called instead of AcquireExecutor() when the
  // usage of the returned executor involves any state updates, e.g. prefill,
  // decode, etc.
  // Note the method try to lock llm_executor_mutex_ and audio_executor_mutex_
  // in order to clone the audio context if needed, thus other methods should
  // not try to acquire the audio executor within the llm executor mutex.
  // TODO(b/483136581): Refactor the locking mechanism.
  absl::StatusOr<std::unique_ptr<LlmExecutor>>
  AcquireExecutorWithContextHandler(
      std::shared_ptr<ContextHandler> new_context_handle)
      ABSL_LOCKS_EXCLUDED(executor_mutex_)
          ABSL_LOCKS_EXCLUDED(audio_executor_mutex_);

  // Try to load the vision executor if the vision executor is not loaded.
  absl::Status TryLoadingVisionExecutor()
      ABSL_LOCKS_EXCLUDED(vision_executor_mutex_);

  // Acquires the vision executor.
  absl::StatusOr<std::unique_ptr<VisionExecutor>> AcquireVisionExecutor()
      ABSL_LOCKS_EXCLUDED(vision_executor_mutex_);

  // Try to load the audio executor if the audio executor is not loaded.
  absl::Status TryLoadingAudioExecutor()
      ABSL_LOCKS_EXCLUDED(audio_executor_mutex_);

  // Acquires the audio executor.
  absl::StatusOr<std::unique_ptr<AudioExecutor>> AcquireAudioExecutor()
      ABSL_LOCKS_EXCLUDED(audio_executor_mutex_);

  // Returns the audio executor properties.
  absl::StatusOr<AudioExecutorProperties> GetAudioExecutorProperties()
      ABSL_LOCKS_EXCLUDED(audio_executor_mutex_);

  // Returns the vision executor properties.
  absl::StatusOr<VisionExecutorProperties> GetVisionExecutorProperties()
      ABSL_LOCKS_EXCLUDED(vision_executor_mutex_);

 private:
  // Creates the litert environment if it is not created yet.
  absl::Status MaybeCreateLitertEnv();

  // Guards the llm_executor_.
  absl::Mutex executor_mutex_;

  // Maintains the gpu executor.
  std::shared_ptr<LlmExecutor> llm_executor_ ABSL_GUARDED_BY(executor_mutex_);

  // Maintains the current llm context.
  std::shared_ptr<ContextHandler> current_handler_
      ABSL_GUARDED_BY(executor_mutex_);

  // Map lora id from hash. If lora is provided by lora path, lora path will be
  // treated as the hash key.
  absl::flat_hash_map<std::string, uint32_t> lora_hash_to_id_;

  // The mutex lock for the vision executor.
  absl::Mutex vision_executor_mutex_;

  std::shared_ptr<VisionExecutor> vision_executor_
      ABSL_GUARDED_BY(vision_executor_mutex_);

  // The vision executor options, needed for loading the vision executor.
  std::unique_ptr<VisionExecutorSettings> vision_executor_settings_;

  // The mutex lock for the audio executor.
  absl::Mutex audio_executor_mutex_;

  std::shared_ptr<AudioExecutor> audio_executor_
      ABSL_GUARDED_BY(audio_executor_mutex_);

  // The audio executor options, needed for loading the audio executor.
  std::unique_ptr<AudioExecutorSettings> audio_executor_settings_;

  // The potential litert compiled model environment for the vision and audio
  // executor.
  ::litert::Environment* absl_nullable litert_env_;

  // The backup litert compiled model environment for the vision and audio
  // executor, created if litert_env is not provided when resource manager is
  // created.
  std::unique_ptr<::litert::Environment> backup_litert_env_;

  // The llm executor settings.
  std::optional<LlmExecutorSettings> llm_executor_settings_;

  friend class LockedLlmExecutor;
};

}  // namespace litert::lm

#endif  // THIRD_PARTY_ODML_LITERT_LM_RUNTIME_FRAMEWORK_RESOURCE_MANAGEMENT_RESOURCE_MANAGER_H_