Spaces:
Running
Running
| // Copyright 2025 The ODML Authors. | |
| // | |
| // Licensed under the Apache License, Version 2.0 (the "License"); | |
| // you may not use this file except in compliance with the License. | |
| // You may obtain a copy of the License at | |
| // | |
| // http://www.apache.org/licenses/LICENSE-2.0 | |
| // | |
| // Unless required by applicable law or agreed to in writing, software | |
| // distributed under the License is distributed on an "AS IS" BASIS, | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| // See the License for the specific language governing permissions and | |
| // limitations under the License. | |
| namespace litert::lm { | |
| namespace { | |
| constexpr absl::string_view kPathToModel1 = "\\path\\to\\model1"; | |
| constexpr absl::string_view kPathToModel1Tflite = "\\path\\to\\model1.tflite"; | |
| constexpr absl::string_view kPathToModel1TfliteCache = | |
| "\\path\\to\\model1.tflite.cache"; | |
| constexpr absl::string_view kPathToCache = "\\path\\to\\cache"; | |
| constexpr absl::string_view kWeightCachePath = "\\weight\\cache\\path"; | |
| constexpr absl::string_view kWeightCachePathWithSeparator = | |
| "\\weight\\cache\\path\\"; | |
| constexpr absl::string_view kWeightCachePathFile = | |
| "\\weight\\cache\\path\\model1.tflite.cache"; | |
| constexpr absl::string_view kWeightCachePathXnnpackFile = | |
| "\\weight\\cache\\path\\model1.tflite.xnnpack_cache"; | |
| constexpr absl::string_view kModel1TfliteCustomSuffix = | |
| "\\path\\to\\model1.tflite.custom_suffix"; | |
| constexpr absl::string_view kPathToModel1 = "/path/to/model1"; | |
| constexpr absl::string_view kPathToModel1Tflite = "/path/to/model1.tflite"; | |
| constexpr absl::string_view kPathToModel1TfliteCache = | |
| "/path/to/model1.tflite.cache"; | |
| constexpr absl::string_view kPathToCache = "/path/to/cache"; | |
| constexpr absl::string_view kWeightCachePath = "/weight/cache/path"; | |
| constexpr absl::string_view kWeightCachePathWithSeparator = | |
| "/weight/cache/path/"; | |
| constexpr absl::string_view kWeightCachePathFile = | |
| "/weight/cache/path/model1.tflite.cache"; | |
| constexpr absl::string_view kWeightCachePathXnnpackFile = | |
| "/weight/cache/path/model1.tflite.xnnpack_cache"; | |
| constexpr absl::string_view kModel1TfliteCustomSuffix = | |
| "/path/to/model1.tflite.custom_suffix"; | |
| using absl::StatusCode::kInvalidArgument; | |
| using ::testing::VariantWith; | |
| using ::testing::status::StatusIs; | |
| TEST(LlmExecutorConfigTest, Backend) { | |
| Backend backend; | |
| std::stringstream oss; | |
| backend = Backend::CPU_ARTISAN; | |
| oss << backend; | |
| EXPECT_EQ(oss.str(), "CPU_ARTISAN"); | |
| backend = Backend::GPU_ARTISAN; | |
| oss.str(""); | |
| oss << backend; | |
| EXPECT_EQ(oss.str(), "GPU_ARTISAN"); | |
| backend = Backend::GPU; | |
| oss.str(""); | |
| oss << backend; | |
| EXPECT_EQ(oss.str(), "GPU"); | |
| backend = Backend::CPU; | |
| oss.str(""); | |
| oss << backend; | |
| EXPECT_EQ(oss.str(), "CPU"); | |
| backend = Backend::GOOGLE_TENSOR_ARTISAN; | |
| oss.str(""); | |
| oss << backend; | |
| EXPECT_EQ(oss.str(), "GOOGLE_TENSOR_ARTISAN"); | |
| backend = Backend::NPU; | |
| oss.str(""); | |
| oss << backend; | |
| EXPECT_EQ(oss.str(), "NPU"); | |
| } | |
| TEST(LlmExecutorConfigTest, StringToBackend) { | |
| ASSERT_OK_AND_ASSIGN(auto backend, GetBackendFromString("cpu_artisan")); | |
| EXPECT_EQ(backend, Backend::CPU_ARTISAN); | |
| ASSERT_OK_AND_ASSIGN(backend, GetBackendFromString("gpu_artisan")); | |
| EXPECT_EQ(backend, Backend::GPU_ARTISAN); | |
| ASSERT_OK_AND_ASSIGN(backend, GetBackendFromString("gpu")); | |
| EXPECT_EQ(backend, Backend::GPU); | |
| ASSERT_OK_AND_ASSIGN(backend, GetBackendFromString("cpu")); | |
| EXPECT_EQ(backend, Backend::CPU); | |
| ASSERT_OK_AND_ASSIGN(backend, GetBackendFromString("google_tensor_artisan")); | |
| EXPECT_EQ(backend, Backend::GOOGLE_TENSOR_ARTISAN); | |
| ASSERT_OK_AND_ASSIGN(backend, GetBackendFromString("npu")); | |
| EXPECT_EQ(backend, Backend::NPU); | |
| } | |
| TEST(LlmExecutorConfigTest, ActivationDataType) { | |
| ActivationDataType act; | |
| std::stringstream oss; | |
| act = ActivationDataType::FLOAT32; | |
| oss << act; | |
| EXPECT_EQ(oss.str(), "FLOAT32"); | |
| act = ActivationDataType::FLOAT16; | |
| oss.str(""); | |
| oss << act; | |
| EXPECT_EQ(oss.str(), "FLOAT16"); | |
| } | |
| TEST(LlmExecutorConfigTest, FakeWeightsMode) { | |
| FakeWeightsMode fake_weights_mode; | |
| std::stringstream oss; | |
| fake_weights_mode = FakeWeightsMode::FAKE_WEIGHTS_NONE; | |
| oss << fake_weights_mode; | |
| EXPECT_EQ(oss.str(), "FAKE_WEIGHTS_NONE"); | |
| fake_weights_mode = FakeWeightsMode::FAKE_WEIGHTS_8BITS_ALL_LAYERS; | |
| oss.str(""); | |
| oss << fake_weights_mode; | |
| EXPECT_EQ(oss.str(), "FAKE_WEIGHTS_8BITS_ALL_LAYERS"); | |
| fake_weights_mode = FakeWeightsMode::FAKE_WEIGHTS_ATTN_8_FFN_4_EMB_4; | |
| oss.str(""); | |
| oss << fake_weights_mode; | |
| EXPECT_EQ(oss.str(), "FAKE_WEIGHTS_ATTN_8_FFN_4_EMB_4"); | |
| } | |
| TEST(LlmExecutorConfigTest, FileFormat) { | |
| std::stringstream oss; | |
| oss.str(""); | |
| oss << FileFormat::TFLITE; | |
| EXPECT_EQ(oss.str(), "TFLITE"); | |
| oss.str(""); | |
| oss << FileFormat::TASK; | |
| EXPECT_EQ(oss.str(), "TASK"); | |
| oss.str(""); | |
| oss << FileFormat::LITERT_LM; | |
| EXPECT_EQ(oss.str(), "LITERT_LM"); | |
| } | |
| TEST(LlmExecutorConfigTest, ModelAssets) { | |
| auto model_assets = ModelAssets::Create(kPathToModel1); | |
| ASSERT_OK(model_assets); | |
| std::stringstream oss; | |
| oss << *model_assets; | |
| const std::string expected_output = | |
| absl::StrCat("model_path: ", kPathToModel1, | |
| "\nfake_weights_mode: FAKE_WEIGHTS_NONE\n"); | |
| EXPECT_EQ(oss.str(), expected_output); | |
| } | |
| GpuArtisanConfig CreateGpuArtisanConfig() { | |
| GpuArtisanConfig config; | |
| config.num_output_candidates = 1; | |
| config.wait_for_weight_uploads = true; | |
| config.num_decode_steps_per_sync = 3; | |
| config.sequence_batch_size = 16; | |
| config.supported_lora_ranks = {4, 16}; | |
| config.max_top_k = 40; | |
| config.enable_decode_logits = true; | |
| config.use_submodel = true; | |
| return config; | |
| } | |
| TEST(LlmExecutorConfigTest, GpuArtisanConfig) { | |
| GpuArtisanConfig config = CreateGpuArtisanConfig(); | |
| std::stringstream oss; | |
| oss << config; | |
| const std::string expected_output = R"(num_output_candidates: 1 | |
| wait_for_weight_uploads: 1 | |
| num_decode_steps_per_sync: 3 | |
| sequence_batch_size: 16 | |
| supported_lora_ranks: vector of 2 elements: [4, 16] | |
| max_top_k: 40 | |
| enable_decode_logits: 1 | |
| enable_external_embeddings: 0 | |
| use_submodel: 1 | |
| )"; | |
| EXPECT_EQ(oss.str(), expected_output); | |
| } | |
| TEST(LlmExecutorConfigTest, LlmExecutorSettings) { | |
| auto model_assets = ModelAssets::Create(kPathToModel1); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, | |
| LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets), Backend::GPU_ARTISAN)); | |
| settings.SetBackendConfig(CreateGpuArtisanConfig()); | |
| settings.SetMaxNumTokens(1024); | |
| settings.SetActivationDataType(ActivationDataType::FLOAT16); | |
| settings.SetMaxNumImages(1); | |
| settings.SetCacheDir(std::string(kPathToCache)); | |
| std::stringstream oss; | |
| oss << settings; | |
| const std::string expected_output = absl::StrCat( | |
| R"(backend: GPU_ARTISAN | |
| backend_config: | |
| num_output_candidates: 1 | |
| wait_for_weight_uploads: 1 | |
| num_decode_steps_per_sync: 3 | |
| sequence_batch_size: 16 | |
| supported_lora_ranks: vector of 2 elements: [4, 16] | |
| max_top_k: 40 | |
| enable_decode_logits: 1 | |
| enable_external_embeddings: 0 | |
| use_submodel: 1 | |
| max_tokens: 1024 | |
| activation_data_type: FLOAT16 | |
| max_num_images: 1 | |
| lora_rank: 0 | |
| cache_dir: )", | |
| kPathToCache, R"( | |
| cache_file: Not set | |
| litert_dispatch_lib_dir: Not set | |
| model_assets: model_path: )", | |
| kPathToModel1, R"( | |
| fake_weights_mode: FAKE_WEIGHTS_NONE | |
| advanced_settings: Not set | |
| )"); | |
| EXPECT_EQ(oss.str(), expected_output); | |
| } | |
| TEST(LlmExecutorConfigTest, LlmExecutorSettingsWithAdvancedSettings) { | |
| auto model_assets = ModelAssets::Create(kPathToModel1); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, | |
| LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets), Backend::GPU_ARTISAN)); | |
| settings.SetBackendConfig(CreateGpuArtisanConfig()); | |
| settings.SetMaxNumTokens(1024); | |
| settings.SetActivationDataType(ActivationDataType::FLOAT16); | |
| settings.SetMaxNumImages(1); | |
| settings.SetCacheDir(std::string(kPathToCache)); | |
| settings.SetAdvancedSettings(AdvancedSettings{ | |
| .prefill_batch_sizes = {128, 256}, | |
| .num_output_candidates = 3, | |
| .configure_magic_numbers = true, | |
| .verify_magic_numbers = true, | |
| .clear_kv_cache_before_prefill = false, | |
| .num_logits_to_print_after_decode = 10, | |
| .gpu_madvise_original_shared_tensors = true, | |
| .is_benchmark = true, | |
| .preferred_device_substr = "nvidia", | |
| .num_threads_to_upload = 4, | |
| .num_threads_to_compile = 2, | |
| .convert_weights_on_gpu = true, | |
| .wait_for_weights_conversion_complete_in_benchmark = false, | |
| .optimize_shader_compilation = false, | |
| .cache_compiled_shaders_only = true, | |
| .share_constant_tensors = false, | |
| .sampler_handles_input = false, | |
| .allow_src_quantized_fc_conv_ops = true, | |
| .hint_waiting_for_completion = false, | |
| .enable_speculative_decoding = false, | |
| .disable_delegate_clustering = false, | |
| }); | |
| std::stringstream oss; | |
| oss << settings; | |
| const std::string expected_output = absl::StrCat( | |
| R"(backend: GPU_ARTISAN | |
| backend_config: | |
| num_output_candidates: 1 | |
| wait_for_weight_uploads: 1 | |
| num_decode_steps_per_sync: 3 | |
| sequence_batch_size: 16 | |
| supported_lora_ranks: vector of 2 elements: [4, 16] | |
| max_top_k: 40 | |
| enable_decode_logits: 1 | |
| enable_external_embeddings: 0 | |
| use_submodel: 1 | |
| max_tokens: 1024 | |
| activation_data_type: FLOAT16 | |
| max_num_images: 1 | |
| lora_rank: 0 | |
| cache_dir: )", | |
| kPathToCache, R"( | |
| cache_file: Not set | |
| litert_dispatch_lib_dir: Not set | |
| model_assets: model_path: )", | |
| kPathToModel1, R"( | |
| fake_weights_mode: FAKE_WEIGHTS_NONE | |
| advanced_settings: prefill_batch_sizes: [128, 256] | |
| num_output_candidates: 3 | |
| configure_magic_numbers: 1 | |
| verify_magic_numbers: 1 | |
| clear_kv_cache_before_prefill: 0 | |
| num_logits_to_print_after_decode: 10 | |
| gpu_madvise_original_shared_tensors: 1 | |
| is_benchmark: 1 | |
| preferred_device_substr: nvidia | |
| num_threads_to_upload: 4 | |
| num_threads_to_compile: 2 | |
| convert_weights_on_gpu: 1 | |
| wait_for_weights_conversion_complete_in_benchmark: 0 | |
| optimize_shader_compilation: 0 | |
| cache_compiled_shaders_only: 1 | |
| share_constant_tensors: 0 | |
| sampler_handles_input: 0 | |
| allow_src_quantized_fc_conv_ops: 1 | |
| hint_waiting_for_completion: 0 | |
| gpu_context_low_priority: Not set | |
| enable_speculative_decoding: 0 | |
| disable_delegate_clustering: 0 | |
| )"); | |
| EXPECT_EQ(oss.str(), expected_output); | |
| } | |
| TEST(LlmExecutorConfigTest, AdvancedSettingsWithGpuContextLowPriority) { | |
| auto model_assets = ModelAssets::Create(kPathToModel1); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, | |
| LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets), Backend::GPU_ARTISAN)); | |
| settings.SetAdvancedSettings(AdvancedSettings{ | |
| .gpu_context_low_priority = true, | |
| }); | |
| std::stringstream oss; | |
| oss << settings; | |
| EXPECT_THAT(oss.str(), ::testing::HasSubstr("gpu_context_low_priority: 1")); | |
| settings.SetAdvancedSettings(AdvancedSettings{ | |
| .gpu_context_low_priority = false, | |
| }); | |
| oss.str(""); | |
| oss << settings; | |
| EXPECT_THAT(oss.str(), ::testing::HasSubstr("gpu_context_low_priority: 0")); | |
| } | |
| TEST(GetWeightCacheFileTest, CacheDirAndModelPath) { | |
| auto model_assets = ModelAssets::Create(kPathToModel1Tflite); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets))); | |
| settings.SetCacheDir(std::string(kWeightCachePath)); | |
| ASSERT_OK_AND_ASSIGN(auto weight_cache_file, settings.GetWeightCacheFile()); | |
| EXPECT_THAT(weight_cache_file, | |
| VariantWith<std::string>(std::string(kWeightCachePathFile))); | |
| } | |
| TEST(GetWeightCacheFileTest, CacheDirHasTrailingSeparator) { | |
| auto model_assets = ModelAssets::Create(kPathToModel1Tflite); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets))); | |
| settings.SetCacheDir(std::string(kWeightCachePathWithSeparator)); | |
| ASSERT_OK_AND_ASSIGN(auto weight_cache_file, settings.GetWeightCacheFile()); | |
| EXPECT_THAT(weight_cache_file, | |
| VariantWith<std::string>(std::string(kWeightCachePathFile))); | |
| } | |
| TEST(GetWeightCacheFileTest, CacheDirAndModelPathAndCustomSuffix) { | |
| auto model_assets = ModelAssets::Create(kPathToModel1Tflite); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets))); | |
| settings.SetCacheDir(std::string(kWeightCachePath)); | |
| ASSERT_OK_AND_ASSIGN(auto weight_cache_file, | |
| settings.GetWeightCacheFile(".xnnpack_cache")); | |
| EXPECT_THAT(weight_cache_file, VariantWith<std::string>( | |
| std::string(kWeightCachePathXnnpackFile))); | |
| } | |
| TEST(LlmExecutorConfigTest, ModelPathOnly) { | |
| auto model_assets = ModelAssets::Create(kPathToModel1Tflite); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets))); | |
| ASSERT_OK_AND_ASSIGN(auto weight_cache_file, settings.GetWeightCacheFile()); | |
| EXPECT_THAT(weight_cache_file, | |
| VariantWith<std::string>(std::string(kPathToModel1TfliteCache))); | |
| } | |
| TEST(GetWeightCacheFileTest, ModelPathAndSuffix) { | |
| auto model_assets = ModelAssets::Create(kPathToModel1Tflite); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets))); | |
| ASSERT_OK_AND_ASSIGN(auto weight_cache_file, | |
| settings.GetWeightCacheFile(".custom_suffix")); | |
| EXPECT_THAT(weight_cache_file, | |
| VariantWith<std::string>(std::string(kModel1TfliteCustomSuffix))); | |
| } | |
| TEST(GetWeightCacheFileTest, PreferScopedCacheFileToCacheDir) { | |
| const auto cache_path = | |
| std::filesystem::path(::testing::SrcDir()) / | |
| "litert_lm/runtime/testdata/test_lm.cache"; | |
| ASSERT_OK_AND_ASSIGN(auto cache_file, ScopedFile::Open(cache_path.string())); | |
| auto shared_cache_file = std::make_shared<ScopedFile>(std::move(cache_file)); | |
| auto model_assets = ModelAssets::Create(kPathToModel1Tflite); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets))); | |
| settings.SetScopedCacheFile(shared_cache_file); | |
| settings.SetCacheDir(std::string(kWeightCachePath)); | |
| ASSERT_OK_AND_ASSIGN(auto weight_cache_file, settings.GetWeightCacheFile()); | |
| EXPECT_THAT(weight_cache_file, | |
| VariantWith<std::shared_ptr<ScopedFile>>(shared_cache_file)); | |
| } | |
| TEST(GetWeightCacheFileTest, PreferScopedCacheFileToScopedModelFile) { | |
| const auto model_path = | |
| std::filesystem::path(::testing::SrcDir()) / | |
| "litert_lm/runtime/testdata/test_lm.litertlm"; | |
| const auto cache_path = | |
| std::filesystem::path(::testing::SrcDir()) / | |
| "litert_lm/runtime/testdata/test_lm.cache"; | |
| ASSERT_OK_AND_ASSIGN(auto model_file, ScopedFile::Open(model_path.string())); | |
| ASSERT_OK_AND_ASSIGN(auto cache_file, ScopedFile::Open(cache_path.string())); | |
| auto shared_cache_file = std::make_shared<ScopedFile>(std::move(cache_file)); | |
| auto model_assets = | |
| ModelAssets::Create(std::make_shared<ScopedFile>(std::move(model_file))); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets))); | |
| settings.SetScopedCacheFile(shared_cache_file); | |
| ASSERT_OK_AND_ASSIGN(auto weight_cache_file, settings.GetWeightCacheFile()); | |
| EXPECT_THAT(weight_cache_file, | |
| VariantWith<std::shared_ptr<ScopedFile>>(shared_cache_file)); | |
| } | |
| TEST(GetWeightCacheFileTest, EmptyModelPath) { | |
| auto model_assets = ModelAssets::Create(""); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets))); | |
| settings.SetCacheDir(std::string(kWeightCachePath)); | |
| EXPECT_THAT(settings.GetWeightCacheFile(".xnnpack_cache"), | |
| StatusIs(kInvalidArgument)); | |
| } | |
| TEST(GetWeightCacheFileTest, CacheDisabled) { | |
| const auto cache_path = | |
| std::filesystem::path(::testing::SrcDir()) / | |
| "litert_lm/runtime/testdata/test_lm.cache"; | |
| ASSERT_OK_AND_ASSIGN(auto cache_file, ScopedFile::Open(cache_path.string())); | |
| auto model_assets = ModelAssets::Create(kPathToModel1Tflite); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets))); | |
| settings.SetCacheDir(":nocache"); | |
| // This should be ignored in favor of the explicitly disabled cache dir. | |
| settings.SetScopedCacheFile( | |
| std::make_shared<ScopedFile>(std::move(cache_file))); | |
| EXPECT_THAT(settings.GetWeightCacheFile(), StatusIs(kInvalidArgument)); | |
| } | |
| TEST(LlmExecutorConfigTest, GetBackendConfig) { | |
| auto model_assets = ModelAssets::Create(kPathToModel1); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, | |
| LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets), Backend::GPU_ARTISAN)); | |
| settings.SetBackendConfig(CreateGpuArtisanConfig()); | |
| ASSERT_OK_AND_ASSIGN(auto gpu_config, | |
| settings.GetBackendConfig<GpuArtisanConfig>()); | |
| EXPECT_EQ(gpu_config.num_output_candidates, 1); | |
| EXPECT_TRUE(gpu_config.use_submodel); | |
| // Test setting via MutableBackendConfig | |
| ASSERT_OK_AND_ASSIGN(auto mutable_gpu_config, | |
| settings.MutableBackendConfig<GpuArtisanConfig>()); | |
| mutable_gpu_config.use_submodel = false; | |
| settings.SetBackendConfig(mutable_gpu_config); | |
| ASSERT_OK_AND_ASSIGN(auto updated_gpu_config, | |
| settings.GetBackendConfig<GpuArtisanConfig>()); | |
| EXPECT_FALSE(updated_gpu_config.use_submodel); | |
| EXPECT_THAT(settings.GetBackendConfig<CpuConfig>(), | |
| StatusIs(kInvalidArgument)); | |
| } | |
| TEST(LlmExecutorConfigTest, MutableBackendConfig) { | |
| auto model_assets = ModelAssets::Create(kPathToModel1); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, | |
| LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets), Backend::GPU_ARTISAN)); | |
| settings.SetBackendConfig(CreateGpuArtisanConfig()); | |
| ASSERT_OK_AND_ASSIGN(auto gpu_config, | |
| settings.MutableBackendConfig<GpuArtisanConfig>()); | |
| gpu_config.num_output_candidates = 2; | |
| settings.SetBackendConfig(gpu_config); | |
| ASSERT_OK_AND_ASSIGN(auto gpu_config_after_change, | |
| settings.GetBackendConfig<GpuArtisanConfig>()); | |
| EXPECT_EQ(gpu_config_after_change.num_output_candidates, 2); | |
| EXPECT_THAT(settings.MutableBackendConfig<CpuConfig>(), | |
| StatusIs(kInvalidArgument)); | |
| } | |
| TEST(LlmExecutorConfigTest, SetSupportedLoraRanks) { | |
| auto model_assets = ModelAssets::Create(kPathToModel1); | |
| ASSERT_OK(model_assets); | |
| ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( | |
| *std::move(model_assets), | |
| Backend::GPU_ARTISAN, Backend::GPU)); | |
| EXPECT_EQ(settings.GetSamplerBackend(), Backend::GPU); | |
| } | |
| } // namespace | |
| } // namespace litert::lm | |