diff --git a/Genie/Genie/GenieSymbols.default b/Genie/Genie/GenieSymbols.default
new file mode 100644
index 0000000000000000000000000000000000000000..4084db46f37f5b3d47b9ed1f7e65938d185786f2
--- /dev/null
+++ b/Genie/Genie/GenieSymbols.default
@@ -0,0 +1,31 @@
+#=============================================================================
+#
+#  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+#  All Rights Reserved.
+#  Confidential and Proprietary - Qualcomm Technologies, Inc.
+#
+#=============================================================================
+{
+  global:
+    Genie_getApiMajorVersion*;
+    Genie_getApiMinorVersion*;
+    Genie_getApiPatchVersion*;
+    GenieDialogConfig_createFromJson*;
+    GenieDialogConfig_free*;
+    GenieDialog_create*;
+    GenieDialog_query*;
+    GenieDialog_tokenQuery*;
+    GenieDialog_embeddingQuery*;
+    GenieDialog_save*;
+    GenieDialog_restore*;
+    GenieDialog_reset*;
+    GenieDialog_setLoraStrength*;
+    GenieDialog_applyLora*;
+    GenieDialog_free*;
+    GenieEmbeddingConfig_createFromJson*;
+    GenieEmbeddingConfig_free*;
+    GenieEmbedding_create*;
+    GenieEmbedding_generate*;
+    GenieEmbedding_free*;
+  local: *;
+};
\ No newline at end of file
diff --git a/Genie/Genie/Makefile b/Genie/Genie/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..dab323ec87f1151f78d1f88dcd443636f4d53454
--- /dev/null
+++ b/Genie/Genie/Makefile
@@ -0,0 +1,57 @@
+#=============================================================================
+#
+#  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+#  All Rights Reserved.
+#  Confidential and Proprietary - Qualcomm Technologies, Inc.
+#
+#=============================================================================
+
+RUST_TARGET := aarch64-linux-android
+RUST_SOURCE_DIR := ./src/qualla/tokenizers/rust
+# specify compiler
+export CXX := clang++-14
+export PATH := $(ANDROID_NDK_ROOT)/toolchains/llvm/prebuilt/linux-x86_64/bin:$(PATH)
+.PHONY: all x86 android clean clean_x86 clean_android
+.DEFAULT: x86
+
+all: x86 android
+
+x86: build_x86_tokenizer
+	@echo "-------------------- Building genie for x86 -------------------- "
+	@$(MAKE) -f make/Makefile.linux-x86_64 CPATH="/usr/include/x86_64-linux-gnu" || (echo "-------------------- genie x86 build failed --------------------"; exit 1; )
+	@echo "-------------------- genie x86 build succeeded -------------------- "
+
+android: check_ndk build_android_tokenizer
+	@echo "-------------------- Building genie for android -------------------- "
+	@$(ANDROID_NDK_ROOT)/ndk-build APP_ALLOW_MISSING_DEPS=true APP_ABI="arm64-v8a" NDK_PROJECT_PATH=./ NDK_APPLICATION_MK=make/Application.mk APP_BUILD_SCRIPT=make/Android.mk || (echo "-------------------- genie android build failed --------------------"; exit 1; )
+	@$(rename_target_dirs)
+	@echo "-------------------- genie android build succeeded -------------------- "
+
+clean: clean_x86 clean_android
+
+clean_x86:
+	@$(MAKE) -f make/Makefile.linux-x86_64 clean
+
+clean_android:
+	if [ -d "lib/aarch64-android" ]; then rm -rf lib/aarch64-android; fi
+	if [ -d "obj/local" ]; then rm -rf obj/local; fi
+
+# utilities
+rename_target_dirs = \
+   				 @if [ -d ./lib/aarch64-android ]; then rm -rf ./lib/aarch64-android; fi; \
+					 find ./obj/local -type d -execdir rename 's/arm64-v8a/aarch64-android/' '{}' \+ \
+					 && mkdir -p lib \
+					 && mv ./obj/local/aarch64-android lib/ \
+					 && mv ./libs/arm64-v8a/libc++_shared.so lib/aarch64-android/ \
+					 && rm -rf ./libs \
+
+check_ndk:
+ifeq ($(ANDROID_NDK_ROOT),)
+	$(error ERROR: ANDROID_NDK_ROOT not set, skipping compilation for Android platform(s).)
+endif
+
+build_x86_tokenizer: $(RUST_SOURCE_DIR)/Cargo.toml
+	cargo build --release --manifest-path=$<
+
+build_android_tokenizer: $(RUST_SOURCE_DIR)/Cargo.toml
+	cargo build --release --manifest-path=$< --target=$(RUST_TARGET)
diff --git a/Genie/Genie/README b/Genie/Genie/README
new file mode 100644
index 0000000000000000000000000000000000000000..2ed544b5dd74dabd328ef8ca2cf04aecd46a20d4
--- /dev/null
+++ b/Genie/Genie/README
@@ -0,0 +1,16 @@
+#=============================================================================
+#
+#  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+#  All Rights Reserved.
+#  Confidential and Proprietary - Qualcomm Technologies, Inc.
+#
+#=============================================================================
+
+Genie library source code example
+---------------------------------
+
+The Genie library (libGenie.so / Genie.dll) source code example provides users with an ability to recreate the Genie
+library from source. Note that the Genie library source may be refactored, rewritten, or otherwise modified without
+notice. The Genie C API is the commercially controlled and versioned interface that users should expect to be stable.
+Please refer to the Genie SDK documentation tutorials at ${SDK_ROOT}/doc/Genie/ for more information on how to build the
+sample code.
\ No newline at end of file
diff --git a/Genie/Genie/make/Android.mk b/Genie/Genie/make/Android.mk
new file mode 100644
index 0000000000000000000000000000000000000000..319f417eb9d4c2be12da0ed39c91719960903ef2
--- /dev/null
+++ b/Genie/Genie/make/Android.mk
@@ -0,0 +1,56 @@
+#=============================================================================
+#
+#  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+#  All Rights Reserved.
+#  Confidential and Proprietary - Qualcomm Technologies, Inc.
+#
+#=============================================================================
+
+LOCAL_PATH := $(call my-dir)
+SUPPORTED_TARGET_ABI := arm64-v8a x86 x86_64
+
+#============================ Verify Target Info and Application Variables =========================================
+ifneq ($(filter $(TARGET_ARCH_ABI),$(SUPPORTED_TARGET_ABI)),)
+    ifneq ($(APP_STL), c++_shared)
+        $(error Unsupported APP_STL: "$(APP_STL)")
+    endif
+else
+    $(error Unsupported TARGET_ARCH_ABI: '$(TARGET_ARCH_ABI)')
+endif
+
+#============================ Define Common Variables ===============================================================
+# PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../../../../../include/QNN
+# Include paths
+PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../include
+PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../../../../include/Genie
+PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/qualla/include
+PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../../../../include/QNN
+PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../../../../include/QNN/HTP
+PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/qualla/tokenizers
+PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/qualla/engines/qnn-api
+PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/qualla/engines/qnn-cpu
+PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/qualla/engines/qnn-htp
+
+#========================== Define T2T Lib variables =============================================
+include $(CLEAR_VARS)
+LOCAL_MODULE := tokenizers_capi
+LOCAL_SRC_FILES := ../src/qualla/tokenizers/rust/target/aarch64-linux-android/release/libtokenizers_capi.a
+include $(PREBUILT_STATIC_LIBRARY)
+
+include $(CLEAR_VARS)
+LOCAL_C_INCLUDES               := $(PACKAGE_C_INCLUDES)
+MY_SRC_FILES                   := $(wildcard $(LOCAL_PATH)/../src/*.cpp)
+MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/qualla/*.cpp)
+MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/qualla/dialogs/*.cpp)
+MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/qualla/engines/*.cpp)
+MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/qualla/engines/qnn-api/*.cpp)
+MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/qualla/engines/qnn-cpu/*.cpp)
+MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/qualla/engines/qnn-htp/*.cpp)
+MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/qualla/utils/*.cpp)
+MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/qualla/loggers/*.cpp)
+MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/qualla/samplers/*.cpp)
+
+LOCAL_MODULE                   := libGenie
+LOCAL_SRC_FILES                := $(subst make/,,$(MY_SRC_FILES))
+LOCAL_STATIC_LIBRARIES         := tokenizers_capi
+include $(BUILD_SHARED_LIBRARY)
diff --git a/Genie/Genie/make/Application.mk b/Genie/Genie/make/Application.mk
new file mode 100644
index 0000000000000000000000000000000000000000..4e0596f856b93970ac937ab0b6302b74306ae05b
--- /dev/null
+++ b/Genie/Genie/make/Application.mk
@@ -0,0 +1,14 @@
+#=============================================================================
+#
+#  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+#  All Rights Reserved.
+#  Confidential and Proprietary - Qualcomm Technologies, Inc.
+#
+#=============================================================================
+
+APP_ABI      := arm64-v8a
+APP_STL      := c++_shared
+APP_PLATFORM := android-21
+APP_MODULES := Genie
+APP_CPPFLAGS += -std=c++2a -O3 -Wall -frtti -fexceptions -fvisibility=hidden -DGENIE_API="__attribute__((visibility(\"default\")))" -DSPILLFILL -DQUALLA_ENGINE_QNN_HTP=TRUE -DQUALLA_ENGINE_QNN_CPU=TRUE -DQUALLA_APPS=OFF -DFMT_HEADER_ONLY -DGENIE_SAMPLE -DQUALLA_INTERNAL_QNN_SDK -DGENIE_SSD_FEATURE -DGENIE_SPD_FEATURE -DGENIE_LADE_FEATURE -DGENIE_MULTISTREAM_FEATURE -DGENIE_LORA_FEATURE -DGENIE_E2T_FEATURE
+APP_LDFLAGS  += -lc -lm -ldl -Wl,--version-script=GenieSymbols.default -Wl,--strip-all
diff --git a/Genie/Genie/make/Makefile.linux-x86_64 b/Genie/Genie/make/Makefile.linux-x86_64
new file mode 100644
index 0000000000000000000000000000000000000000..98d4d4a9657c72a5ea359a475eed0b9103339d68
--- /dev/null
+++ b/Genie/Genie/make/Makefile.linux-x86_64
@@ -0,0 +1,192 @@
+#=============================================================================
+#
+#  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+#  All Rights Reserved.
+#  Confidential and Proprietary - Qualcomm Technologies, Inc.
+#
+#=============================================================================
+
+# define relevant directories
+SRC_DIR := src/qualla
+#
+SRC_DIR_GENIE_TOKENIZERS := src/qualla/tokenizers
+#
+SRC_DIR_SAMPLE_DIALOGS := src/qualla/dialogs
+
+# All engines
+SRC_DIR_GENIE_ENGINES := src/qualla/engines
+SRC_DIR_GENIE_QNN_API := src/qualla/engines/qnn-api
+SRC_DIR_GENIE_ENGINES_CPU := src/qualla/engines/qnn-cpu
+SRC_DIR_GENIE_UTILS := src/qualla/utils
+#
+SRC_DIR_GENIE_LOGGERS := src/qualla/loggers
+
+#
+SRC_DIR_GENIE_SAMPLERS := src/qualla/samplers
+
+#
+SRC_DIR_GENIE := src
+
+# Includes
+GENIE_ENGINES_CPU_INCLUDE := src/qualla/engines/qnn-cpu
+GENIE_ENGINES_API_INCLUDE := src/qualla/engines/qnn-api
+GENIE_ENGINES_HTP_INCLUDE := src/qualla/engines/qnn-htp
+GENIE_TOKENIZER_INCLUDE   := src/qualla/tokenizers
+
+GENIE_INCLUDE  := include
+GENIE_C_API_HEADERS_INCLUDE := ../../../include/Genie
+QUALLA_INCLUDE := src/qualla/include
+QNN_API_INCLUDE := ../../../include/QNN/
+QNN_API_HTP_INCLUDE := $(QNN_API_INCLUDE)/HTP
+
+AR := /usr/bin/ar
+ARFLAGS := rcs
+# Checking if clang++ is present. If not switch to clang++
+ifeq ($(shell $(CXX) -v 2>&1 | grep -c "clang version"), 0)
+CXX := clang++
+endif
+
+QNN_TARGET ?= x86_64-linux-clang
+export TARGET_DIR := ./lib/$(QNN_TARGET)
+
+libGenie := $(TARGET_DIR)/libGenie.so
+libtokenizers := src/qualla/tokenizers/rust/target/release/libtokenizers_capi.a
+
+# define target architecture if not previously defined, default is x86
+ifndef TARGET_AARCH_VARS
+TARGET_AARCH_VARS:= -march=x86-64
+endif
+
+.PHONY: linux_x86_64
+.DEFAULT: linux_x86_64
+GENIE_all: $(libGenie)
+
+# Include paths
+INCLUDES += -I$(GENIE_INCLUDE) -I$(QUALLA_INCLUDE) -I$(SRC_DIR_GENIE_TOKENIZERS) -I$(QNN_API_INCLUDE) -I$(GENIE_ENGINES_CPU_INCLUDE) -I$(QNN_API_HTP_INCLUDE) -I$(GENIE_ENGINES_API_INCLUDE) -I$(GENIE_TOKENIZER_INCLUDE) -I$(GENIE_C_API_HEADERS_INCLUDE)
+
+# set compiler flags
+COMMON_CXXFLAGS = -std=c++2a -frtti -fPIC -Wall -pg -pthread -nostdinc++ -stdlib=libc++ -idirafter /usr/lib/llvm-14/include/c++/v1 -nostdinc -idirafter /usr/lib/llvm-14/lib/clang/14.0.0/include/ -idirafter /usr/include $(INCLUDES)
+COMMON_LDFLAGS = -shared -s -fPIC -pthread -L/usr/lib/x86_64-linux-gnu  -L./src/qualla/tokenizers/rust/target/release
+
+COMMON_CFLAGS = -nostdinc -idirafter /usr/lib/llvm-14/lib/clang/14.0.0/include/ -idirafter /usr/include
+
+ifdef QNN_DEBUG_ENABLE
+CXXFLAGS += $(COMMON_CXXFLAGS) -march=x86-64 -O0 -g -DQNN_API="" -DSPILLFILL -DQUALLA_ENGINE_QNN_CPU=TRUE -DQUALLA_APPS=OFF -DFMT_HEADER_ONLY -DGENIE_SAMPLE -DQUALLA_INTERNAL_QNN_SDK -DGENIE_SSD_FEATURE -DGENIE_SPD_FEATURE -DGENIE_LADE_FEATURE -DGENIE_MULTISTREAM_FEATURE -DGENIE_LORA_FEATURE -DGENIE_E2T_FEATURE
+CFLAGS += $(COMMON_CFLAGS)
+LDFLAGS += $(COMMON_LDFLAGS)
+else
+CXXFLAGS += $(COMMON_CXXFLAGS) -march=x86-64 -O3 -Wno-write-strings -fvisibility=hidden -DGENIE_API="__attribute__((visibility(\"default\")))" -DSPILLFILL -DQUALLA_ENGINE_QNN_CPU=TRUE -DQUALLA_APPS=OFF -DFMT_HEADER_ONLY -DGENIE_SAMPLE -DQUALLA_INTERNAL_QNN_SDK -DGENIE_SSD_FEATURE -DGENIE_SPD_FEATURE -DGENIE_LADE_FEATURE -DGENIE_MULTISTREAM_FEATURE -DGENIE_LORA_FEATURE -DGENIE_E2T_FEATURE
+CFLAGS += $(COMMON_CFLAGS)
+LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto
+endif
+
+# define library sources
+SOURCES_GENIE_CPP := $(wildcard $(SRC_DIR_GENIE)/*.cpp)
+SOURCES := $(wildcard $(SRC_DIR)/*.cpp)
+SOURCES_GENIE_TOKENIZERS := $(wildcard $(SRC_DIR_GENIE_TOKENIZERS)/*.cpp)
+SOURCES_GENIE_QNN_API_CPP := $(wildcard $(SRC_DIR_GENIE_QNN_API)/*.cpp)
+
+SOURCES_GENIE_ENGINES_CPP := $(filter-out $(SRC_DIR_GENIE_ENGINES)/qnn-htp.cpp, $(wildcard $(SRC_DIR_GENIE_ENGINES)/*.cpp))
+SOURCES_GENIE_DIALOGS_CPP := $(wildcard $(SRC_DIR_SAMPLE_DIALOGS)/*.cpp)
+SOURCES_GENIE_ENGINES_CPU_CPP := $(wildcard $(SRC_DIR_GENIE_ENGINES_CPU)/*.cpp)
+SOURCES_GENIE_UTILS_CPP := $(wildcard $(SRC_DIR_GENIE_UTILS)/*.cpp)
+
+
+SOURCES_GENIE_LOGGERS_CPP := $(wildcard $(SRC_DIR_GENIE_LOGGERS)/*.cpp)
+SOURCES_GENIE_SAMPLERS_CPP := $(wildcard $(SRC_DIR_GENIE_SAMPLERS)/*.cpp)
+
+
+# define object directory
+OBJ_ROOT := obj
+OBJ_DIR_QUALLA := obj/$(QNN_TARGET)/qualla
+OBJ_DIR_GENIE := obj/$(QNN_TARGET)/genie
+OBJ_DIR_GENIE_TOKENIZERS := $(OBJ_DIR_QUALLA)/tokenizers
+OBJ_DIR_GENIE_QNN_API := $(OBJ_DIR_QUALLA)/qnn-api
+
+OBJ_DIR_GENIE_DIALOGS := $(OBJ_DIR_QUALLA)/dialogs
+OBJ_DIR_GENIE_ENGINES := $(OBJ_DIR_QUALLA)/engines
+OBJ_DIR_GENIE_UTILS := $(OBJ_DIR_QUALLA)/utils
+OBJ_DIR_GENIE_ENGINES_CPU := $(OBJ_DIR_QUALLA)/engines/qnn-cpu
+$(shell mkdir -p $(OBJ_DIR_GENIE_ENGINES_CPU))
+
+OBJ_DIR_GENIE_LOGGERS := obj/$(QNN_TARGET)/qualla/loggers
+OBJ_DIR_GENIE_SAMPLERS := obj/$(QNN_TARGET)/qualla/samplers
+
+$(shell mkdir -p $(OBJ_DIR_GENIE))
+$(shell mkdir -p $(OBJ_DIR_GENIE_LOGGERS))
+$(shell mkdir -p $(OBJ_DIR_GENIE_SAMPLERS))
+
+# setup object files in object directory
+OBJECTS_GENIE := $(patsubst %.cpp,$(OBJ_DIR_GENIE)/%.o,$(foreach x,$(SOURCES_GENIE_CPP),$(notdir $(x))))
+OBJECTS_QUALLA := $(patsubst %.cpp,$(OBJ_DIR_QUALLA)/%.o,$(foreach x,$(SOURCES),$(notdir $(x))))
+OBJECTS_GENIE_TOKENIZERS := $(patsubst %.cpp,$(OBJ_DIR_GENIE_TOKENIZERS)/%.o,$(foreach x,$(SOURCES_GENIE_TOKENIZERS),$(notdir $(x))))
+OBJECTS_GENIE_QNN_API := $(patsubst %.cpp,$(OBJ_DIR_GENIE_QNN_API)/%.o,$(foreach x,$(SOURCES_GENIE_QNN_API_CPP),$(notdir $(x))))
+OBJECTS_GENIE_ENGINES := $(patsubst %.cpp,$(OBJ_DIR_GENIE_ENGINES)/%.o,$(foreach x,$(SOURCES_GENIE_ENGINES_CPP),$(notdir $(x))))
+OBJECTS_GENIE_DIALOGS := $(patsubst %.cpp,$(OBJ_DIR_GENIE_DIALOGS)/%.o,$(foreach x,$(SOURCES_GENIE_DIALOGS_CPP),$(notdir $(x))))
+OBJECTS_GENIE_UTILS := $(patsubst %.cpp,$(OBJ_DIR_GENIE_UTILS)/%.o,$(foreach x,$(SOURCES_GENIE_UTILS_CPP),$(notdir $(x))))
+OBJECTS_GENIE_ENGINES_CPU := $(patsubst %.cpp,$(OBJ_DIR_GENIE_ENGINES_CPU)/%.o,$(foreach x,$(SOURCES_GENIE_ENGINES_CPU_CPP),$(notdir $(x))))
+
+OBJECTS_GENIE_LOGGERS := $(patsubst %.cpp,$(OBJ_DIR_GENIE_LOGGERS)/%.o,$(foreach x,$(SOURCES_GENIE_LOGGERS_CPP),$(notdir $(x))))
+OBJECTS_GENIE_SAMPLERS := $(patsubst %.cpp,$(OBJ_DIR_GENIE_SAMPLERS)/%.o,$(foreach x,$(SOURCES_GENIE_SAMPLERS_CPP),$(notdir $(x))))
+
+LIBS=-ldl
+
+
+# Rule to make shared lib
+.PHONY: libGenie
+libGenie: $(libGenie)
+
+# Implicit rule to compile and link object files
+$(OBJ_DIR_GENIE)/%.o: $(SRC_DIR_GENIE)/%.cpp
+	$(CXX) $(CXXFLAGS) -c $^ -o $@
+
+$(OBJ_DIR_QUALLA)/%.o: $(SRC_DIR)/%.cpp
+	$(CXX) $(CXXFLAGS) -c $^ -o $@
+
+$(OBJ_DIR_GENIE_TOKENIZERS)/%.o: $(SRC_DIR_GENIE_TOKENIZERS)/%.cpp
+	$(CXX) $(CXXFLAGS) -c $^ -o $@
+
+$(OBJ_DIR_GENIE_QNN_API)/%.o: $(SRC_DIR_GENIE_QNN_API)/%.cpp
+	$(CXX) $(CXXFLAGS) -c $^ -o $@
+
+$(OBJ_DIR_GENIE_ENGINES)/%.o: $(SRC_DIR_GENIE_ENGINES)/%.cpp $(CXX) $(CXXFLAGS) -c $^ -o $@
+
+$(OBJ_DIR_GENIE_DIALOGS)/%.o: $(SRC_DIR_SAMPLE_DIALOGS)/%.cpp $(CXX) $(CXXFLAGS) -c $^ -o $@
+
+$(OBJ_DIR_GENIE_UTILS)/%.o: $(SRC_DIR_GENIE_UTILS)/%.cpp $(CXX) $(CXXFLAGS) -c $^ -o $@
+
+$(OBJ_DIR_GENIE_ENGINES_CPU)/%.o: $(SRC_DIR_GENIE_ENGINES_CPU)/%.cpp $(CXX) $(CXXFLAGS) -c $^ -o $@
+
+$(OBJ_DIR_GENIE_LOGGERS)/%.o: $(SRC_DIR_GENIE_LOGGERS)/%.cpp $(CXX) $(CXXFLAGS) -c $^ -o $@
+
+$(OBJ_DIR_GENIE_SAMPLERS)/%.o: $(SRC_DIR_GENIE_SAMPLERS)/%.cpp $(CXX) $(CXXFLAGS) -c $^ -o $@
+
+
+# set up resources
+directories := $(TARGET_DIR) $(OBJ_DIR_GENIE) $(OBJ_DIR_GENIE_QNN_API) $(OBJ_DIR_QUALLA) $(OBJ_DIR_GENIE_TOKENIZERS) $(OBJ_DIR_GENIE_ENGINES) $(OBJ_DIR_GENIE_DIALOGS) $(OBJ_DIR_GENIE_UTILS) $(OBJ_DIR_GENIE_ENGINES_CPU) $(OBJ_DIR_GENIE_LOGGERS) $(OBJ_DIR_GENIE_SAMPLERS)
+
+# Compile
+$(libGenie): $(OBJECTS_GENIE) $(OBJECTS_QUALLA) $(OBJECTS_GENIE_QNN_API) $(OBJECTS_GENIE_TOKENIZERS) $(OBJECTS_GENIE_ENGINES) $(OBJECTS_GENIE_DIALOGS) $(OBJECTS_GENIE_UTILS) $(OBJECTS_GENIE_ENGINES_CPU) $(OBJECTS_GENIE_LOGGERS) $(OBJECTS_GENIE_SAMPLERS) | $(directories)
+	$(CXX) $(CXXFLAGS) -shared -o $@ $^ $(LIBS) $(libtokenizers)
+
+
+# rule for object directory resource
+$(OBJECTS_GENIE): | $(OBJ_DIR_GENIE)
+$(OBJECTS_QUALLA): | $(OBJ_DIR_QUALLA)
+$(OBJECTS_GENIE_TOKENIZERS): | $(OBJ_DIR_GENIE_TOKENIZERS)
+$(OBJECTS_GENIE_QNN_API): | $(OBJ_DIR_GENIE_QNN_API)
+$(OBJECTS_GENIE_ENGINES): | $(OBJ_DIR_GENIE_ENGINES)
+$(OBJECTS_GENIE_DIALOGS): | $(OBJ_DIR_GENIE_DIALOGS)
+$(OBJECTS_GENIE_UTILS): | $(OBJ_DIR_GENIE_UTILS)
+$(OBJECTS_GENIE_ENGINES_CPU): | $(OBJ_DIR_GENIE_ENGINES_CPU)
+$(OBJECTS_GENIE_LOGGERS): | $(OBJ_DIR_GENIE_LOGGERS)
+$(OBJECTS_GENIE_SAMPLERS): | $(OBJ_DIR_GENIE_SAMPLERS)
+
+
+# rule to create directories
+$(directories):
+	mkdir -p $@
+
+.PHONY: clean
+clean:
+	rm -rf $(OBJ_ROOT) $(TARGET_DIR)
diff --git a/Genie/Genie/src/Dialog.cpp b/Genie/Genie/src/Dialog.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e3812e81362e3ccde2a9e96473339b3b760b883c
--- /dev/null
+++ b/Genie/Genie/src/Dialog.cpp
@@ -0,0 +1,1804 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <exception>
+#include <set>
+#include <sstream>
+
+#include "Dialog.hpp"
+#include "Exception.hpp"
+#include "Macro.hpp"
+#include "qualla/detail/json.hpp"
+#include "qualla/env.hpp"
+
+using namespace genie;
+
+#ifdef _WIN32
+inline std::string libPrefix = "";
+inline std::string libSuffix = ".dll";
+#else
+inline std::string libPrefix = "lib";
+inline std::string libSuffix = ".so";
+#endif
+
+inline std::string getLibName(std::string baseName) { return libPrefix + baseName + libSuffix; }
+
+//=============================================================================
+// Context::Config functions
+//=============================================================================
+
+static void validateContextConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "context config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version", "bos-token", "eos-token", "size", "n-vocab"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing context field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "context";
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid context config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "bos-token") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "eos-token") {
+      JSON_ENFORCE_ARRAY_OR_NUMERIC();
+    } else if (item.key() == "eot-token") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "size") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "n-vocab") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "pad-token") {
+      JSON_ENFORCE_NUMERIC();
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown context config key: " + item.key());
+    }
+  }
+}
+
+static void translateContextConfig(const qualla::json& genieConfig, qualla::json& quallaConfig) {
+  if (genieConfig["dialog"].contains("context")) {
+    if (genieConfig["dialog"]["context"].contains("bos-token")) {
+      quallaConfig["context"]["bos-token"] = genieConfig["dialog"]["context"]["bos-token"];
+    }
+    if (genieConfig["dialog"]["context"].contains("eos-token")) {
+      quallaConfig["context"]["eos-token"] = genieConfig["dialog"]["context"]["eos-token"];
+    }
+    if (genieConfig["dialog"]["context"].contains("eot-token")) {
+      quallaConfig["context"]["eot-token"] = genieConfig["dialog"]["context"]["eot-token"];
+    }
+    if (genieConfig["dialog"]["context"].contains("size")) {
+      quallaConfig["context"]["size"] = genieConfig["dialog"]["context"]["size"];
+    }
+    if (genieConfig["dialog"]["context"].contains("n-vocab")) {
+      quallaConfig["context"]["n-vocab"] = genieConfig["dialog"]["context"]["n-vocab"];
+    }
+    if (genieConfig["dialog"]["context"].contains("pad-token")) {
+      quallaConfig["context"]["pad-token"] = genieConfig["dialog"]["context"]["pad-token"];
+    }
+  }
+}
+
+//=============================================================================
+// Sampler::Config functions
+//=============================================================================
+
+static void validateSamplerConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "sampler config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing sampler field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "sampler";
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid sampler config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "seed") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "temp") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "top-k") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "top-p") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "greedy") {
+      JSON_ENFORCE_BOOLEAN();
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown sampler config key: " + item.key());
+    }
+  }
+}
+
+static void translateSamplerConfig(const qualla::json& genieConfig, qualla::json& quallaConfig) {
+  if (genieConfig["dialog"].contains("sampler")) {
+    quallaConfig["sampler"]["type"] = "basic";
+
+    if (genieConfig["dialog"]["sampler"].contains("seed")) {
+      quallaConfig["sampler"]["seed"] = genieConfig["dialog"]["sampler"]["seed"];
+    }
+    if (genieConfig["dialog"]["sampler"].contains("temp")) {
+      quallaConfig["sampler"]["temp"] = genieConfig["dialog"]["sampler"]["temp"];
+    }
+
+    quallaConfig["sampler"]["role"] = "primary";
+#if defined(GENIE_SPD_FEATURE)
+    if (genieConfig["dialog"]["type"] == "spd") {
+      quallaConfig["sampler"]["role"] = "target";
+    }
+#endif
+
+    if (genieConfig["dialog"]["sampler"].contains("top-k")) {
+      quallaConfig["sampler"]["top-k"] = genieConfig["dialog"]["sampler"]["top-k"];
+    }
+    if (genieConfig["dialog"]["sampler"].contains("top-p")) {
+      quallaConfig["sampler"]["top-p"] = genieConfig["dialog"]["sampler"]["top-p"];
+    }
+    if (genieConfig["dialog"]["sampler"].contains("greedy")) {
+      quallaConfig["sampler"]["greedy"] = genieConfig["dialog"]["sampler"]["greedy"];
+    }
+    if (genieConfig["dialog"]["sampler"].contains("seed")) {
+      quallaConfig["sampler"]["seed"] = genieConfig["dialog"]["sampler"]["seed"];
+    }
+  }
+}
+
+//=============================================================================
+// Tokenizer::Config functions
+//=============================================================================
+
+static void validateTokenizerConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "tokenizer config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version", "path"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing tokenizer field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "tokenizer";
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid tokenizer config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "path") {
+      JSON_ENFORCE_STRING();
+      // Note: the existence of this file is checked by qualla
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "Unknown tokenizer config key: " + item.key());
+    }
+  }
+}
+
+static void translateTokenizerConfig(const qualla::json& genieConfig, qualla::json& quallaConfig) {
+  quallaConfig["tokenizer"] = genieConfig["dialog"]["tokenizer"]["path"];
+}
+
+//=============================================================================
+// Embedding::Config functions
+//=============================================================================
+
+static void validateEmbeddingConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "embedding config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version", "size"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing embedding field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "embedding";
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid embedding config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "size") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "datatype") {
+      JSON_ENFORCE_STRING();
+      const std::set<std::string> supportedTypes = {"float32", "native"};
+      if (std::find(supportedTypes.begin(), supportedTypes.end(), item.value()) ==
+          supportedTypes.end()) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Unknown embedding datatype: " + std::string(item.value()));
+      }
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "Unknown embedding config key: " + item.key());
+    }
+  }
+}
+
+static void translateEmbeddingConfig(const qualla::json& genieConfig, qualla::json& quallaConfig) {
+  if (genieConfig["dialog"].contains("embedding")) {
+    quallaConfig["context"]["n-embd"] = genieConfig["dialog"]["embedding"]["size"];
+
+    if (genieConfig["dialog"]["embedding"].contains("datatype")) {
+      quallaConfig["context"]["embedding-datatype"] =
+          genieConfig["dialog"]["embedding"]["datatype"];
+    }
+  }
+}
+
+bool position_dim_set = false;
+bool rope_theta_set   = false;
+
+//=============================================================================
+// Backend::Config functions
+//=============================================================================
+
+static void validateBackendHtpConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "QnnHtp config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{
+      "version", "spill-fill-bufsize", "mmap-budget", "use-mmap", "cpu-mask", "poll"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing QnnHtp field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "QnnHtp";
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid QnnHtp config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "spill-fill-bufsize") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "mmap-budget") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "use-mmap") {
+      JSON_ENFORCE_BOOLEAN();
+#ifdef _WIN32
+      if (item.value() == true) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid QnnHtp config. use-mmap not supported on target");
+      }
+#endif
+    } else if (item.key() == "pos-id-dim") {
+      position_dim_set = true;
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "cpu-mask") {
+      JSON_ENFORCE_STRING();
+    } else if (item.key() == "poll") {
+      JSON_ENFORCE_BOOLEAN();
+    } else if (item.key() == "kv-dim") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "kv-update-method") {
+      JSON_ENFORCE_STRING();
+    } else if (item.key() == "allow-async-init") {
+      JSON_ENFORCE_BOOLEAN();
+    } else if (item.key() == "rope-theta") {
+      rope_theta_set = true;
+      JSON_ENFORCE_NUMERIC();
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown QnnHtp config key: " + item.key());
+    }
+  }
+}
+
+static void validateBackendGenaiConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "QnnGenAiTransformer config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "Missing QnnGenAiTransformer field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "QnnGenAiTransformer";
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(
+            GENIE_STATUS_ERROR_JSON_VALUE,
+            "Invalid QnnGenAiTransformer config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "use-mmap") {
+      JSON_ENFORCE_BOOLEAN();
+#ifdef _WIN32
+      if (item.value() == true) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid QnnGenAiTransformer config. use-mmap not supported on target");
+      }
+#endif
+    } else if (item.key() == "n-logits") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "n-layer") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "n-embd") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "n-heads") {
+      JSON_ENFORCE_NUMERIC();
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "Unknown QnnGenAiTransformer config key: " + item.key());
+    }
+  }
+}
+
+static void validateBackendConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "backend config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version", "type"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing backend field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "backend";
+
+  std::string type;
+  bool htp = false;
+  qualla::json htpConfig;
+  bool genai = false;
+  qualla::json genaiConfig;
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid backend config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "type") {
+      JSON_ENFORCE_STRING();
+      type = item.value().get<std::string>();
+      if (type == "QnnHtp") {
+        htp = true;
+      } else if (type == "QnnGenAiTransformer") {
+        genai = true;
+      } else {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid backend config: unsupported type: " + item.value().dump());
+      }
+    } else if (item.key() == "extensions") {
+      JSON_ENFORCE_STRING();
+    } else if (item.key() == "QnnHtp") {
+      JSON_ENFORCE_OBJECT();
+      htpConfig = item.value();
+    } else if (item.key() == "QnnGenAiTransformer") {
+      JSON_ENFORCE_OBJECT();
+      genaiConfig = item.value();
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown backend config key: " + item.key());
+    }
+  }
+
+  if (htp) {
+    if (!htpConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing QnnHtp dialog config");
+    }
+    validateBackendHtpConfig(htpConfig);
+  } else {
+    if (htpConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "QnnHtp backend config for incorrect backend type: " + type);
+    }
+  }
+
+  if (genai) {
+    if (!genaiConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing QnnGenAiTransformer dialog config");
+    }
+    validateBackendGenaiConfig(genaiConfig);
+  } else {
+    if (genaiConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "QnnGenAiTransformer backend config for incorrect backend type: " + type);
+    }
+  }
+}
+
+//=============================================================================
+// Model::Config functions
+//=============================================================================
+
+static void validateLoraAdapterConfig(const qualla::json& config,
+                                      LORA_VERSION& specifiedLoraVersion) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "lora adapter config is not an object");
+  }
+  const std::set<std::string> mandatoryFields{"version", "name"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing lora adapter field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  const std::string component        = "lora adapter";
+  LORA_VERSION configuredLoraVersion = LORA_VERSION::GENIE_LORA_VERSION_UNDEFINED;
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid lora config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "name") {
+      JSON_ENFORCE_STRING();
+    } else if (item.key() == "bin-sections") {
+      JSON_ENFORCE_ARRAY();
+      configuredLoraVersion = LORA_VERSION::GENIE_LORA_VERSION_V2;  // Adapter occurs with V2
+      for (auto& elem : item.value()) {
+        if (!elem.is_string()) {
+          throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                          "bin-sections must be an array of strings");
+        }
+      }
+    } else if (item.key() == "path") {
+      configuredLoraVersion = LORA_VERSION::GENIE_LORA_VERSION_V1;  // Weights are V1
+      JSON_ENFORCE_STRING();
+      // Note:all directory validations will done by NSP engine
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "Unknown lora adapter config key: " + item.key());
+    }
+  }
+
+  if (specifiedLoraVersion == LORA_VERSION::GENIE_LORA_VERSION_V1 &&
+      configuredLoraVersion == LORA_VERSION::GENIE_LORA_VERSION_V2) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                    "LoRA Adapters must be used with lora version: 2");
+  } else if (specifiedLoraVersion == LORA_VERSION::GENIE_LORA_VERSION_V2 &&
+             configuredLoraVersion == LORA_VERSION::GENIE_LORA_VERSION_V1) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                    "LoRA Weights must be used with lora version: 1");
+  } else if (configuredLoraVersion == LORA_VERSION::GENIE_LORA_VERSION_UNDEFINED) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Invalid lora config.");
+  }
+}
+
+static void validateLoraConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "lora config is not an object");
+  }
+
+  const std::set<std::string> mandatoryFields{"version", "adapters"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing lora field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  const std::string component       = "lora";
+  LORA_VERSION specifiedLoraVersion = LORA_VERSION::GENIE_LORA_VERSION_V2;  // Default is loraV2
+  if (config.find("lora-version") != config.end()) {
+    switch (static_cast<uint8_t>(config["lora-version"])) {
+      case 1:
+        specifiedLoraVersion = LORA_VERSION::GENIE_LORA_VERSION_V1;
+        break;
+      case 2:
+        specifiedLoraVersion = LORA_VERSION::GENIE_LORA_VERSION_V2;
+        break;
+      default:
+        specifiedLoraVersion = LORA_VERSION::GENIE_LORA_VERSION_UNDEFINED;
+        break;
+    }
+  }
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid lora config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "alpha-tensor-name") {
+      JSON_ENFORCE_STRING();
+    } else if (item.key() == "adapters") {
+      JSON_ENFORCE_ARRAY();
+      for (auto& elem : item.value()) {
+        validateLoraAdapterConfig(elem, specifiedLoraVersion);
+      }
+    } else if (item.key() == "lora-version") {  // Optional
+      JSON_ENFORCE_NUMERIC();
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown lora config key: " + item.key());
+    }
+  }
+  if (specifiedLoraVersion == LORA_VERSION::GENIE_LORA_VERSION_UNDEFINED) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                    "Unsupported lora version: " + to_string(config["lora-version"]));
+  }
+}
+
+static void validateModelBinaryConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "binary config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version", "ctx-bins"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing binary field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "binary";
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid binary config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "ctx-bins") {
+      JSON_ENFORCE_ARRAY();
+      for (auto& elem : item.value()) {
+        if (!elem.is_string()) {
+          throw Exception(GENIE_STATUS_ERROR_JSON_VALUE, "ctx-bins must be an array of strings");
+        }
+      }
+    } else if (item.key() == "lora") {
+      JSON_ENFORCE_OBJECT();
+      validateLoraConfig(item.value());
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown binary config key: " + item.key());
+    }
+  }
+}
+
+static void validateModelLibraryConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "library config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version", "model-bin"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing library field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "library";
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid library config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "model-bin") {
+      JSON_ENFORCE_STRING();
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown library config key: " + item.key());
+    }
+  }
+}
+
+static void validateRopeScalingConfig(const qualla::json& config) {
+  // component is used in the "ENFORCE" macros
+  std::string component = "rope-scaling";
+  if (config.is_object()) {
+    std::string ropeType;
+    for (auto& item : config.items()) {
+      if (item.key() == "rope-type") {
+        JSON_ENFORCE_STRING();
+        ropeType = item.value().get<std::string>();
+        if (ropeType != "llama3" && ropeType != "default" && ropeType != "longrope") {
+          throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Rope type not supported" + ropeType);
+        }
+      } else if (item.key() == "factor" || item.key() == "low-freq-factor" ||
+                 item.key() == "high-freq-factor" ||
+                 item.key() == "original-max-position-embeddings") {
+        JSON_ENFORCE_NUMERIC();
+      } else if (item.key() == "short-factor" || item.key() == "long-factor") {
+        JSON_ENFORCE_ARRAY();
+      } else {
+        throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                        "Rope scaling parameter not supported " + item.key());
+      }
+    }
+  }
+}
+
+static void validatePositionalEncodingConfig(const qualla::json& config) {
+  // component is used in the "ENFORCE" macros
+  std::string component = "positional-encoding";
+  qualla::json ropeScalingConfig;
+  if (config.is_object()) {
+    for (auto& item : config.items()) {
+      if (item.key() == "type") {
+        std::string positionEncodingType = item.value().get<std::string>();
+        if (positionEncodingType != "rope" && positionEncodingType != "absolute" &&
+            positionEncodingType != "alibi") {
+          throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "positional-encoding type not supported");
+        }
+      } else if (item.key() == "rope-dim") {
+        JSON_ENFORCE_NUMERIC();
+      } else if (item.key() == "rope-theta") {
+        JSON_ENFORCE_NUMERIC();
+      } else if (item.key() == "rope-scaling") {
+        JSON_ENFORCE_OBJECT();
+        ropeScalingConfig = item.value();
+      } else {
+        throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                        "Unknown positional encoding config key: " + item.key());
+      }
+    }
+  }
+  if (position_dim_set) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                    "Specify one config from pos-id-dim and positional-encoding");
+  }
+  if (rope_theta_set) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                    "Specify one config from rope-theta and positional-encoding");
+  }
+  if (ropeScalingConfig.is_object()) {
+    validateRopeScalingConfig(ropeScalingConfig);
+  }
+}
+
+static void validateModelConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "model config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version", "type"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing model field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "model";
+
+  std::string type;
+  bool binary = false;
+  qualla::json binaryConfig;
+  bool library = false;
+  qualla::json libraryConfig;
+  qualla::json positionalEncodingConfig;
+  bool positionalEncoding = false;
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid model config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "type") {
+      JSON_ENFORCE_STRING();
+      type = item.value().get<std::string>();
+      if (type == "binary") {
+        binary = true;
+      } else if (type == "library") {
+        library = true;
+      } else {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid model config: unsupported type: " + item.value().dump());
+      }
+    } else if (item.key() == "binary") {
+      JSON_ENFORCE_OBJECT();
+      binaryConfig = item.value();
+    } else if (item.key() == "library") {
+      JSON_ENFORCE_OBJECT();
+      libraryConfig = item.value();
+    } else if (item.key() == "positional-encoding") {
+      JSON_ENFORCE_OBJECT();
+      positionalEncodingConfig = item.value();
+      positionalEncoding       = true;
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown model config key: " + item.key());
+    }
+  }
+
+  if (binary) {
+    if (!binaryConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing binary model config");
+    }
+    validateModelBinaryConfig(binaryConfig);
+  } else {
+    if (binaryConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "binary model config for incorrect model type: " + type);
+    }
+  }
+
+  if (library) {
+    if (!libraryConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing library model config");
+    }
+    validateModelLibraryConfig(libraryConfig);
+  } else {
+    if (libraryConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "library model config for incorrect model type: " + type);
+    }
+  }
+
+  if (positionalEncoding) {
+    if (!positionalEncodingConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing Positional encoding config");
+    }
+    validatePositionalEncodingConfig(positionalEncodingConfig);
+  } else {
+    if (positionalEncodingConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "Positional encoding config for incorrect model type: " + type);
+    }
+  }
+}
+
+//=============================================================================
+// Engine::Config functions
+//=============================================================================
+
+static void validateEngineConfig(const qualla::json& config, std::string dialogType) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "engine config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version", "backend", "model", "n-threads"};
+#if defined(GENIE_SPD_FEATURE)
+  if (dialogType == "spd") {
+    mandatoryFields.insert("role");
+  }
+#endif
+  if (dialogType == "kv-share") {
+    mandatoryFields.insert("role");
+  }
+
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing engine field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "engine";
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid engine config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "backend") {
+      JSON_ENFORCE_OBJECT();
+      validateBackendConfig(item.value());
+    } else if (item.key() == "model") {
+      JSON_ENFORCE_OBJECT();
+      validateModelConfig(item.value());
+    } else if (item.key() == "n-threads") {
+      JSON_ENFORCE_NUMERIC();
+#if defined(GENIE_SPD_FEATURE)
+    } else if (item.key() == "role" && dialogType == "spd") {
+      JSON_ENFORCE_STRING();
+      if (item.value() != "draft" && item.value() != "target") {
+        throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                        "Unknown value: for engine config key: " + item.key());
+      }
+#endif
+    } else if (item.key() == "role" && dialogType == "kv-share") {
+      JSON_ENFORCE_STRING();
+      if (item.value() != "primary" && item.value() != "secondary") {
+        throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                        "Unknown value: for engine config key: " + item.key());
+      }
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown engine config key: " + item.key());
+    }
+  }
+}
+
+static void validateMultiEngineConfig(const qualla::json& configs, std::string dialogType) {
+  if (configs.is_object()) {
+    validateEngineConfig(configs, dialogType);
+#if defined(GENIE_SPD_FEATURE)
+    if (dialogType == "spd") {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "engine config for spd is not an array");
+    }
+#endif
+    if (dialogType == "kv-share") {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "engine config for kv-share is not an array");
+    }
+#if defined(GENIE_SPD_FEATURE)
+  } else if (configs.is_array() && dialogType == "spd") {
+    if (configs.size() != 2) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "engine config for spd contain invalid number of engines");
+    }
+    bool engineRoleMask[2] = {false, false};
+    for (auto& item : configs) {
+      validateEngineConfig(item, dialogType);
+      if (item["role"] == "draft") {
+        engineRoleMask[0] = true;
+      } else if (item["role"] == "target") {
+        engineRoleMask[1] = true;
+      }
+    }
+    if (!engineRoleMask[0]) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "engine config for spd does not contain draft engine");
+    }
+    if (!engineRoleMask[1]) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "engine config for spd does not contain target engine");
+    }
+#endif
+  } else if (configs.is_array() && dialogType == "kv-share") {
+    if (configs.size() != 2) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "engine config for kv-share contain invalid number of engines");
+    }
+    bool engineRoleMask[2] = {false, false};
+    for (auto& item : configs) {
+      validateEngineConfig(item, dialogType);
+      if (item["role"] == "primary") {
+        engineRoleMask[0] = true;
+      } else if (item["role"] == "secondary") {
+        engineRoleMask[1] = true;
+      }
+    }
+    if (!engineRoleMask[0]) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "engine config for kv-share does not contain primary");
+    }
+    if (!engineRoleMask[1]) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "engine config for kv-share does not contain secondary");
+    }
+  } else {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "engine config is not an object or an array");
+  }
+}
+
+static void translateEngineConfig(const qualla::json& genieEngineConfig,
+                                  qualla::json& quallaEngineConfig) {
+  if (genieEngineConfig["version"] == 1) {
+    if (genieEngineConfig.contains("role")) {
+      quallaEngineConfig["role"] = genieEngineConfig["role"];
+    } else {
+      quallaEngineConfig["role"] = "primary";
+    }
+
+    quallaEngineConfig["n-threads"] = genieEngineConfig["n-threads"];
+
+    if (genieEngineConfig["backend"]["type"] == "QnnHtp") {
+      quallaEngineConfig["type"]        = "qnn-htp";
+      quallaEngineConfig["backend-lib"] = getLibName("QnnHtp");
+      quallaEngineConfig["mmap-budget"] = genieEngineConfig["backend"]["QnnHtp"]["mmap-budget"];
+      quallaEngineConfig["use-mmap"]    = genieEngineConfig["backend"]["QnnHtp"]["use-mmap"];
+      quallaEngineConfig["spill-fill-bufsize"] =
+          genieEngineConfig["backend"]["QnnHtp"]["spill-fill-bufsize"];
+      if (genieEngineConfig["backend"]["QnnHtp"].contains("pos-id-dim")) {
+        quallaEngineConfig["pos-id-dim"] = genieEngineConfig["backend"]["QnnHtp"]["pos-id-dim"];
+      }
+      quallaEngineConfig["cpumask"] = genieEngineConfig["backend"]["QnnHtp"]["cpu-mask"];
+      quallaEngineConfig["poll"]    = genieEngineConfig["backend"]["QnnHtp"]["poll"];
+      quallaEngineConfig["kv-dim"]  = genieEngineConfig["backend"]["QnnHtp"]["kv-dim"];
+      if (genieEngineConfig["backend"]["QnnHtp"].contains("rope-theta")) {
+        quallaEngineConfig["rope-theta"] = genieEngineConfig["backend"]["QnnHtp"]["rope-theta"];
+      }
+      if (genieEngineConfig["backend"]["QnnHtp"].contains("kv-update-method")) {
+        quallaEngineConfig["kv-update-method"] =
+            genieEngineConfig["backend"]["QnnHtp"]["kv-update-method"];
+      }
+      // By default, Qualla will default to the async init path.
+      // For now, we are forcing async init off unless explicitly
+      // specified in the Genie config. It is HTP specific feature only.
+      quallaEngineConfig["use-async-Init"] = false;
+      if (genieEngineConfig["backend"]["QnnHtp"].contains("allow-async-init")) {
+        quallaEngineConfig["use-async-Init"] =
+            genieEngineConfig["backend"]["QnnHtp"]["allow-async-init"];
+      }
+    } else if (genieEngineConfig["backend"]["type"] == "QnnGenAiTransformer") {
+      quallaEngineConfig["type"]        = "qnn-cpu";
+      quallaEngineConfig["backend-lib"] = getLibName("QnnGenAiTransformer");
+      if (genieEngineConfig["backend"]["QnnGenAiTransformer"].contains("n-logits")) {
+        quallaEngineConfig["n_logits"] =
+            genieEngineConfig["backend"]["QnnGenAiTransformer"]["n-logits"];
+      }
+      if (genieEngineConfig["backend"]["QnnGenAiTransformer"].contains("use-mmap")) {
+        quallaEngineConfig["use-mmap"] =
+            genieEngineConfig["backend"]["QnnGenAiTransformer"]["use-mmap"];
+      }
+      if (genieEngineConfig["backend"]["QnnGenAiTransformer"].contains("n-layer")) {
+        quallaEngineConfig["n_layer"] =
+            genieEngineConfig["backend"]["QnnGenAiTransformer"]["n-layer"];
+      }
+      if (genieEngineConfig["backend"]["QnnGenAiTransformer"].contains("n-embd")) {
+        quallaEngineConfig["n_embd"] =
+            genieEngineConfig["backend"]["QnnGenAiTransformer"]["n-embd"];
+      }
+      if (genieEngineConfig["backend"]["QnnGenAiTransformer"].contains("n-heads")) {
+        quallaEngineConfig["n_heads"] =
+            genieEngineConfig["backend"]["QnnGenAiTransformer"]["n-heads"];
+      }
+    }
+
+    if (genieEngineConfig["backend"].contains("extensions")) {
+      quallaEngineConfig["backend-ext-conf"] = genieEngineConfig["backend"]["extensions"];
+    }
+
+    if (genieEngineConfig["model"]["type"] == "binary") {
+      quallaEngineConfig["model-list"] = genieEngineConfig["model"]["binary"]["ctx-bins"];
+      if (genieEngineConfig["model"]["binary"].contains("lora")) {
+        quallaEngineConfig["lora-version"] =
+            static_cast<uint8_t>(LORA_VERSION::GENIE_LORA_VERSION_V2);
+        if (genieEngineConfig["model"]["binary"]["lora"].contains("lora-version") &&
+            genieEngineConfig["model"]["binary"]["lora"]["lora-version"] == 1) {
+          quallaEngineConfig["lora-version"] =
+              genieEngineConfig["model"]["binary"]["lora"]["lora-version"];
+        }
+        for (int i = 0; i < genieEngineConfig["model"]["binary"]["lora"]["adapters"].size(); i++) {
+          quallaEngineConfig["lora"][i]["adapter-name"] =
+              genieEngineConfig["model"]["binary"]["lora"]["adapters"][i]["name"];
+          quallaEngineConfig["lora"][i]["alpha-tensor-name"] = "";
+          if (genieEngineConfig["model"]["binary"]["lora"].contains("alpha-tensor-name")) {
+            quallaEngineConfig["lora"][i]["alpha-tensor-name"] =
+                genieEngineConfig["model"]["binary"]["lora"]["alpha-tensor-name"];
+          }
+          quallaEngineConfig["lora"][i]["alpha-tensor-value"] = 1.0f;
+          quallaEngineConfig["lora"][i]["binsection-basedir"] = "";
+          if (genieEngineConfig["model"]["binary"]["lora"].contains("lora-version") &&
+              genieEngineConfig["model"]["binary"]["lora"]["lora-version"] == 1) {
+            quallaEngineConfig["lora"][i]["path"] =
+                genieEngineConfig["model"]["binary"]["lora"]["adapters"][i]["path"];
+          } else {
+            quallaEngineConfig["lora"][i]["bin-sections"] =
+                genieEngineConfig["model"]["binary"]["lora"]["adapters"][i]["bin-sections"];
+          }
+        }
+      }
+    } else if (genieEngineConfig["model"]["type"] == "library") {
+      quallaEngineConfig["model"]          = getLibName("QnnGenAiTransformerModel");
+      quallaEngineConfig["model-bin-path"] = genieEngineConfig["model"]["library"]["model-bin"];
+      quallaEngineConfig["op-package"] =
+          getLibName("QnnGenAiTransformerCpuOpPkg") + ":QnnOpPackage_interfaceProvider";
+    }
+    if (genieEngineConfig["model"].contains("positional-encoding")) {
+      quallaEngineConfig["positional-encoding"]["type"] =
+          genieEngineConfig["model"]["positional-encoding"]["type"];
+      if (genieEngineConfig["model"]["positional-encoding"]["type"] == "rope") {
+        quallaEngineConfig["positional-encoding"]["rope-dim"] =
+            genieEngineConfig["model"]["positional-encoding"]["rope-dim"];
+        if (genieEngineConfig["model"]["positional-encoding"].contains("rope-theta")) {
+          quallaEngineConfig["positional-encoding"]["rope-theta"] =
+              genieEngineConfig["model"]["positional-encoding"]["rope-theta"];
+        }
+        if (genieEngineConfig["model"]["positional-encoding"].contains("rope-scaling")) {
+          if (genieEngineConfig["model"]["positional-encoding"]["rope-scaling"].contains(
+                  "rope-type")) {
+            quallaEngineConfig["positional-encoding"]["rope-scaling"]["rope-type"] =
+                genieEngineConfig["model"]["positional-encoding"]["rope-scaling"]["rope-type"];
+            if (genieEngineConfig["model"]["positional-encoding"]["rope-scaling"]["rope-type"] ==
+                "llama3") {
+              if (genieEngineConfig["model"]["positional-encoding"]["rope-scaling"].contains(
+                      "factor")) {
+                quallaEngineConfig["positional-encoding"]["rope-scaling"]["factor"] =
+                    genieEngineConfig["model"]["positional-encoding"]["rope-scaling"]["factor"];
+              }
+              if (genieEngineConfig["model"]["positional-encoding"]["rope-scaling"].contains(
+                      "low-freq-factor")) {
+                quallaEngineConfig["positional-encoding"]["rope-scaling"]["low-freq-factor"] =
+                    genieEngineConfig["model"]["positional-encoding"]["rope-scaling"]
+                                     ["low-freq-factor"];
+              }
+              if (genieEngineConfig["model"]["positional-encoding"]["rope-scaling"].contains(
+                      "high-freq-factor")) {
+                quallaEngineConfig["positional-encoding"]["rope-scaling"]["high-freq-factor"] =
+                    genieEngineConfig["model"]["positional-encoding"]["rope-scaling"]
+                                     ["high-freq-factor"];
+              }
+              if (genieEngineConfig["model"]["positional-encoding"]["rope-scaling"].contains(
+                      "original-max-position-embeddings")) {
+                quallaEngineConfig["positional-encoding"]["rope-scaling"]
+                                  ["original-max-position-embeddings"] =
+                                      genieEngineConfig["model"]["positional-encoding"]
+                                                       ["rope-scaling"]
+                                                       ["original-max-position-embeddings"];
+              }
+            }
+            if (genieEngineConfig["model"]["positional-encoding"]["rope-scaling"]["rope-type"] ==
+                "longrope") {
+              if (genieEngineConfig["model"]["positional-encoding"]["rope-scaling"].contains(
+                      "factor")) {
+                quallaEngineConfig["positional-encoding"]["rope-scaling"]["factor"] =
+                    genieEngineConfig["model"]["positional-encoding"]["rope-scaling"]["factor"];
+              }
+              if (genieEngineConfig["model"]["positional-encoding"]["rope-scaling"].contains(
+                      "short-factor")) {
+                quallaEngineConfig["positional-encoding"]["rope-scaling"]["short-factor"] =
+                    genieEngineConfig["model"]["positional-encoding"]["rope-scaling"]
+                                     ["short-factor"];
+              }
+              if (genieEngineConfig["model"]["positional-encoding"]["rope-scaling"].contains(
+                      "long-factor")) {
+                quallaEngineConfig["positional-encoding"]["rope-scaling"]["long-factor"] =
+                    genieEngineConfig["model"]["positional-encoding"]["rope-scaling"]
+                                     ["long-factor"];
+              }
+              if (genieEngineConfig["model"]["positional-encoding"]["rope-scaling"].contains(
+                      "original-max-position-embeddings")) {
+                quallaEngineConfig["positional-encoding"]["rope-scaling"]
+                                  ["original-max-position-embeddings"] =
+                                      genieEngineConfig["model"]["positional-encoding"]
+                                                       ["rope-scaling"]
+                                                       ["original-max-position-embeddings"];
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+static void translateMultiEngineConfig(const qualla::json& genieConfig,
+                                       qualla::json& quallaConfig) {
+  if (genieConfig["dialog"]["engine"].is_array()) {
+    quallaConfig["engine"] = qualla::json::array();
+    for (auto& item : genieConfig["dialog"]["engine"]) {
+      qualla::json quallaEngineConfig;
+      translateEngineConfig(item, quallaEngineConfig);
+      quallaConfig["engine"].push_back(quallaEngineConfig);
+    }
+  } else {
+    translateEngineConfig(genieConfig["dialog"]["engine"], quallaConfig["engine"]);
+  }
+}
+
+//=============================================================================
+// Dialog::Config functions
+//=============================================================================
+
+qnn::util::HandleManager<Dialog::Config> Dialog::Config::s_manager;
+
+GenieDialogConfig_Handle_t Dialog::Config::add(std::shared_ptr<Dialog::Config> config) {
+  return (GenieDialogConfig_Handle_t)s_manager.add(config);
+}
+
+std::shared_ptr<Dialog::Config> Dialog::Config::get(GenieDialogConfig_Handle_t handle) {
+  return s_manager.get((qnn::util::Handle_t)handle);
+}
+
+void Dialog::Config::remove(GenieDialogConfig_Handle_t handle) {
+  s_manager.remove((qnn::util::Handle_t)handle);
+}
+
+#if defined(GENIE_SSD_FEATURE)
+static void validateDialogSsdConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "ssd-q1 config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version",
+                                        "ssd-version",
+                                        "forecast-token-count",
+                                        "branches",
+                                        "forecast-prefix",
+                                        "forecast-prefix-name"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing ssd-q1 field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "ssd-q1";
+
+  int branchesSize       = 0;
+  int forecastTokenCount = 0;
+
+  int nStreams     = 1;
+  float pThreshold = 0.0;
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid ssd-q1 config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "ssd-version") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "forecast-token-count") {
+      JSON_ENFORCE_NUMERIC();
+      forecastTokenCount = item.value();
+    } else if (item.key() == "branches") {
+      JSON_ENFORCE_ARRAY();
+      for (auto& elem : item.value()) {
+        if (!elem.is_number_integer()) {
+          throw Exception(GENIE_STATUS_ERROR_JSON_VALUE, "branches must be an array of integers");
+        }
+      }
+      branchesSize = item.value().size();
+    } else if (item.key() == "forecast-prefix") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "forecast-prefix-name") {
+      JSON_ENFORCE_STRING();
+    } else if (item.key() == "n-streams") {
+      JSON_ENFORCE_NUMERIC();
+      nStreams = item.value();
+    } else if (item.key() == "p-threshold") {
+      JSON_ENFORCE_NUMERIC();
+      pThreshold = item.value();
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown ssd-q1 config key: " + item.key());
+    }
+  }
+
+  if ((pThreshold > 0.0) && (nStreams <= 1)) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                    "p-threshold can only be used with multistream (n-streams > 1)");
+  }
+
+  if (branchesSize > forecastTokenCount) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                    "Size of branches array must be less than forecast-token-count");
+  }
+}
+#endif
+
+#if defined(GENIE_LADE_FEATURE)
+static void validateDialogLadeConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "lade config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version", "update-mode", "window", "ngram", "gcap"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing lade field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "lade";
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid lade config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "update-mode") {
+      JSON_ENFORCE_STRING();
+      std::string mode = item.value().get<std::string>();
+      if ((mode != "FWD_MAX_HIT") && (mode != "FWD_LEVEL") && (mode != "ALWAYS_FWD_ONE")) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid lade config: unsupported update-mode: " + item.value().dump());
+      }
+    } else if (item.key() == "window") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "ngram") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "gcap") {
+      JSON_ENFORCE_NUMERIC();
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown lade config key: " + item.key());
+    }
+  }
+}
+#endif
+
+#if defined(GENIE_SPD_FEATURE)
+static void validateDialogSpdConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "spd config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version", "draft-len"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing spd field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "spd";
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid spd config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "draft-len") {
+      JSON_ENFORCE_NUMERIC();
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown spd config key: " + item.key());
+    }
+  }
+}
+#endif
+
+#if defined(GENIE_MULTISTREAM_FEATURE)
+static void validateDialogMultistreamConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "multistream config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version", "n-streams"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing multistream field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "multistream";
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid multistream config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "n-streams") {
+      JSON_ENFORCE_NUMERIC();
+    } else if (item.key() == "p-threshold") {
+      JSON_ENFORCE_NUMERIC();
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "Unknown multistream config key: " + item.key());
+    }
+  }
+}
+#endif
+
+static void validateDialogConfig(const qualla::json& config) {
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Dialog config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"version", "type", "context", "tokenizer", "engine"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing dialog field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "dialog";
+
+  std::string dialogType = "basic";
+#if defined(GENIE_SSD_FEATURE)
+  bool ssdq1 = false;
+  qualla::json ssdq1Config;
+#endif
+#if defined(GENIE_LADE_FEATURE)
+  bool lade = false;
+  qualla::json ladeConfig;
+#endif
+#if defined(GENIE_SPD_FEATURE)
+  bool spd = false;
+  qualla::json spdConfig;
+#endif
+#if defined(GENIE_MULTISTREAM_FEATURE)
+  bool multistream = false;
+  qualla::json multistreamConfig;
+#endif
+
+  for (auto& item : config.items()) {
+    if (item.key() == "version") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() != 1) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "Invalid dialog config: unsupported version: " + item.value().dump());
+      }
+    } else if (item.key() == "type") {
+      JSON_ENFORCE_STRING();
+      dialogType = item.value();
+      if (dialogType == "basic" || dialogType == "kv-share") {
+        // Do nothing
+#if defined(GENIE_SSD_FEATURE)
+      } else if (dialogType == "ssd-q1") {
+        ssdq1 = true;
+#endif
+#if defined(GENIE_LADE_FEATURE)
+      } else if (dialogType == "lade") {
+        lade = true;
+#endif
+#if defined(GENIE_SPD_FEATURE)
+      } else if (dialogType == "spd") {
+        spd = true;
+#endif
+#if defined(GENIE_MULTISTREAM_FEATURE)
+      } else if (dialogType == "multistream") {
+        multistream = true;
+#endif
+      } else {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE, "Invalid dialog type: " + dialogType);
+      }
+#if defined(GENIE_SSD_FEATURE)
+    } else if (item.key() == "ssd-q1") {
+      JSON_ENFORCE_OBJECT();
+      ssdq1Config = item.value();
+      // ssd-q1 validation is done below
+#endif
+#if defined(GENIE_LADE_FEATURE)
+    } else if (item.key() == "lade") {
+      JSON_ENFORCE_OBJECT();
+      ladeConfig = item.value();
+      // ssd-q1 validation is done below
+#endif
+#if defined(GENIE_SPD_FEATURE)
+    } else if (item.key() == "spd") {
+      JSON_ENFORCE_OBJECT();
+      spdConfig = item.value();
+      // spd validation is done below
+#endif
+#if defined(GENIE_MULTISTREAM_FEATURE)
+    } else if (item.key() == "multistream") {
+      JSON_ENFORCE_OBJECT();
+      multistreamConfig = item.value();
+      // multistream validation is done below
+#endif
+    } else if (item.key() == "stop-sequence") {
+      JSON_ENFORCE_ARRAY();
+      for (auto& elem : item.value()) {
+        if (!elem.is_string()) {
+          throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                          "stop-sequence must be an array of strings");
+        }
+      }
+    } else if (item.key() == "max-num-tokens") {
+      JSON_ENFORCE_NUMERIC();
+      if (item.value().get<int>() < 0) {
+        throw Exception(GENIE_STATUS_ERROR_JSON_VALUE,
+                        "number of tokens must be > 0. provided: " + item.value().dump());
+      }
+    } else if (item.key() == "context") {
+      JSON_ENFORCE_OBJECT();
+      validateContextConfig(item.value());
+    } else if (item.key() == "tokenizer") {
+      JSON_ENFORCE_OBJECT();
+      validateTokenizerConfig(item.value());
+    } else if (item.key() == "sampler") {
+      JSON_ENFORCE_OBJECT();
+      validateSamplerConfig(item.value());
+    } else if (item.key() == "engine") {
+      JSON_ENFORCE_ARRAY_OR_OBJECT();
+    } else if (item.key() == "embedding") {
+      JSON_ENFORCE_OBJECT();
+      validateEmbeddingConfig(item.value());
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown dialog config key: " + item.key());
+    }
+  }
+
+  // Engine Verification requires dialogType for engine roles. Since "type" is encounterd
+  // later than "engine" in loop. Therefore, moving engine validation out of the loop.
+  validateMultiEngineConfig(config["engine"], dialogType);
+
+#if defined(GENIE_SSD_FEATURE)
+  if (ssdq1) {
+    if (!ssdq1Config.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing ssd-q1 dialog config");
+    }
+    validateDialogSsdConfig(ssdq1Config);
+  } else {
+    if (ssdq1Config.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "ssd-q1 dialog config for incorrect dialog type: " + dialogType);
+    }
+  }
+#endif
+#if defined(GENIE_LADE_FEATURE)
+  if (lade) {
+    if (!ladeConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing lade dialog config");
+    }
+    validateDialogLadeConfig(ladeConfig);
+  } else {
+    if (ladeConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "lade dialog config for incorrect dialog type: " + dialogType);
+    }
+  }
+#endif
+#if defined(GENIE_SPD_FEATURE)
+  if (spd) {
+    if (!spdConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing spd dialog config");
+    }
+    validateDialogSpdConfig(spdConfig);
+  } else {
+    if (spdConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "spd dialog config for incorrect dialog type: " + dialogType);
+    }
+  }
+#endif
+#if defined(GENIE_MULTISTREAM_FEATURE)
+  if (multistream) {
+    if (!multistreamConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing multistream dialog config");
+    }
+    validateDialogMultistreamConfig(multistreamConfig);
+  } else {
+    if (multistreamConfig.is_object()) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                      "multistream dialog config for incorrect dialog type: " + dialogType);
+    }
+  }
+#endif
+}
+
+static void translateDialogConfig(const qualla::json& genieConfig, qualla::json& quallaConfig) {
+  if (genieConfig["dialog"]["version"] == 1) {
+    if (genieConfig["dialog"]["type"] == "lade") {
+      quallaConfig["type"] = "lhd-dec";
+    } else if (genieConfig["dialog"]["type"] == "spd") {
+      quallaConfig["type"] = "spec-dec";
+    } else if (genieConfig["dialog"]["type"] == "multistream") {
+      quallaConfig["type"] = "multistream";
+    } else {
+      quallaConfig["type"] = genieConfig["dialog"]["type"];
+    }
+#if defined(GENIE_SSD_FEATURE)
+    if (genieConfig["dialog"]["type"] == "ssd-q1") {
+      quallaConfig["ssd-version"] = genieConfig["dialog"]["ssd-q1"]["ssd-version"];
+      quallaConfig["forecast-token-count"] =
+          genieConfig["dialog"]["ssd-q1"]["forecast-token-count"];
+      quallaConfig["branches"]        = genieConfig["dialog"]["ssd-q1"]["branches"];
+      quallaConfig["forecast-prefix"] = genieConfig["dialog"]["ssd-q1"]["forecast-prefix"];
+      quallaConfig["forecast-prefix-name"] =
+          genieConfig["dialog"]["ssd-q1"]["forecast-prefix-name"];
+
+      if (genieConfig["dialog"]["ssd-q1"].contains("n-streams")) {
+        quallaConfig["n-streams"] = genieConfig["dialog"]["ssd-q1"]["n-streams"];
+      }
+      if (genieConfig["dialog"]["ssd-q1"].contains("p-threshold")) {
+        quallaConfig["p-threshold"] = genieConfig["dialog"]["ssd-q1"]["p-threshold"];
+      }
+    }
+#endif
+#if defined(GENIE_LADE_FEATURE)
+    if (genieConfig["dialog"]["type"] == "lade") {
+      quallaConfig["lhd-update-mode"] = genieConfig["dialog"]["lade"]["update-mode"];
+      quallaConfig["window"]          = genieConfig["dialog"]["lade"]["window"];
+      quallaConfig["ngram"]           = genieConfig["dialog"]["lade"]["ngram"];
+      quallaConfig["gcap"]            = genieConfig["dialog"]["lade"]["gcap"];
+    }
+#endif
+#if defined(GENIE_SPD_FEATURE)
+    if (genieConfig["dialog"]["type"] == "spd") {
+      quallaConfig["draft-len"] = genieConfig["dialog"]["spd"]["draft-len"];
+    }
+#endif
+#if defined(GENIE_MULTISTREAM_FEATURE)
+    if (genieConfig["dialog"]["type"] == "multistream") {
+      quallaConfig["n-streams"] = genieConfig["dialog"]["multistream"]["n-streams"];
+      if (genieConfig["dialog"]["multistream"].contains("p-threshold")) {
+        quallaConfig["p-threshold"] = genieConfig["dialog"]["multistream"]["p-threshold"];
+      }
+    }
+#endif
+  }
+  if (genieConfig["dialog"].contains("stop-sequence")) {
+    quallaConfig["prompt"]["stop-sequence"] = genieConfig["dialog"]["stop-sequence"];
+  }
+
+  translateContextConfig(genieConfig, quallaConfig);
+  translateTokenizerConfig(genieConfig, quallaConfig);
+  translateSamplerConfig(genieConfig, quallaConfig);
+  translateMultiEngineConfig(genieConfig, quallaConfig);
+  translateEmbeddingConfig(genieConfig, quallaConfig);
+}
+
+uint32_t getMaxNumTokens(const qualla::json& genieConfig) {
+  uint32_t tokenLimit{UINT32_MAX};
+  if (genieConfig["dialog"]["version"] == 1) {
+    if (genieConfig["dialog"].contains("max-num-tokens")) {
+      tokenLimit = genieConfig["dialog"]["max-num-tokens"];
+    }
+  }
+  return tokenLimit;
+}
+
+Dialog::Config::Config(const char* configStr) {
+  qualla::json config;
+  rope_theta_set   = false;
+  position_dim_set = false;
+  {
+    std::set<qualla::json> keys;
+
+    auto callback = [&keys](int depth, qualla::json::parse_event_t event, qualla::json& parsed) {
+      if ((depth == 1) && (event == qualla::json::parse_event_t::key)) {
+        if (keys.count(parsed) > 0) {
+          throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,
+                          "Multiple dialog config key: " + parsed.dump());
+        }
+        keys.insert(parsed);
+      }
+      return true;
+    };
+
+    config = qualla::json::parse(configStr, callback);
+  }
+
+  if (!config.is_object()) {
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Dialog config is not an object");
+  }
+
+  std::set<std::string> mandatoryFields{"dialog"};
+  for (const auto& field : mandatoryFields) {
+    if (!config.contains(field)) {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Missing dialog field: " + field);
+    }
+  }
+
+  // component is used in the "ENFORCE" macros
+  std::string component = "dialog";
+
+  for (auto& item : config.items()) {
+    if (item.key() == "dialog") {
+      JSON_ENFORCE_OBJECT();
+      validateDialogConfig(item.value());
+    } else {
+      throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA, "Unknown dialog config key: " + item.key());
+    }
+  }
+  m_config = config;
+}
+
+qualla::json Dialog::Config::getJson() const { return m_config; }
+
+//=============================================================================
+// Dialog functions
+//=============================================================================
+
+qnn::util::HandleManager<Dialog> Dialog::s_manager;
+std::atomic<std::uint32_t> Dialog::s_nameCounter{0u};
+
+GenieDialog_Handle_t Dialog::add(std::shared_ptr<Dialog> dialog) {
+  return (GenieDialog_Handle_t)s_manager.add(dialog);
+}
+
+std::shared_ptr<Dialog> Dialog::get(GenieDialog_Handle_t handle) {
+  return s_manager.get((qnn::util::Handle_t)handle);
+}
+
+void Dialog::remove(GenieDialog_Handle_t handle) { s_manager.remove((qnn::util::Handle_t)handle); }
+
+Dialog::Dialog(std::shared_ptr<Config> config) {
+  auto env = qualla::Env::create(qualla::json{});
+  qualla::json quallaConfig;
+  translateDialogConfig(config->getJson(), quallaConfig);
+  m_tokenLimit   = getMaxNumTokens(config->getJson());
+  m_quallaDialog = qualla::Dialog::create(
+      env, "dialog" + std::to_string(s_nameCounter.fetch_add(1u)), quallaConfig);
+  if (!m_quallaDialog) {
+    throw Exception(GENIE_STATUS_ERROR_MEM_ALLOC, "Could not create a dialog object");
+  }
+}
+
+static_assert(qualla::Sentence::Code::COMPLETE ==
+              static_cast<qualla::Sentence::Code>(GENIE_DIALOG_SENTENCE_COMPLETE));
+static_assert(qualla::Sentence::Code::BEGIN ==
+              static_cast<qualla::Sentence::Code>(GENIE_DIALOG_SENTENCE_BEGIN));
+static_assert(qualla::Sentence::Code::CONTINUE ==
+              static_cast<qualla::Sentence::Code>(GENIE_DIALOG_SENTENCE_CONTINUE));
+static_assert(qualla::Sentence::Code::END ==
+              static_cast<qualla::Sentence::Code>(GENIE_DIALOG_SENTENCE_END));
+static_assert(qualla::Sentence::Code::ABORT ==
+              static_cast<qualla::Sentence::Code>(GENIE_DIALOG_SENTENCE_ABORT));
+
+int32_t Dialog::query(const char* queryStr,
+                      GenieDialog_SentenceCode_t sentenceCode,
+                      GenieDialog_QueryCallback_t callback,
+                      const void* userData) {
+  std::string query(queryStr);
+  uint32_t genTokenCount = 0u;
+  bool status            = m_quallaDialog->query(
+      query,
+      static_cast<qualla::Sentence::Code>(sentenceCode),
+      [&](const std::string& response, qualla::Sentence::Code code) {
+        callback(response.c_str(), static_cast<GenieDialog_SentenceCode_t>(code), userData);
+        bool keepGoing = ++genTokenCount < m_tokenLimit;
+        if (!keepGoing && ((code == qualla::Sentence::Code::BEGIN) ||
+                           (code == qualla::Sentence::Code::CONTINUE))) {
+          callback("", GENIE_DIALOG_SENTENCE_END, userData);
+        }
+        return keepGoing;
+      });
+  qualla::Dialog::KPIs kpis = m_quallaDialog->kpis();
+  printf(
+      "\n\n[KPIS]:\nInit Time: %zu us\nPrompt Processing Time: %zu us, Prompt Processing Rate : "
+      "%f toks/sec\n"
+      "Token Generation Time: %zu us, Token Generation Rate: %f toks/sec\n",
+      kpis.init.total_usec,
+      kpis.prompt.last_usec,
+      kpis.tps.prompt,
+      kpis.generate.last_usec,
+      kpis.tps.generate);
+  return (status) ? (GENIE_STATUS_SUCCESS) : (GENIE_STATUS_ERROR_QUERY_FAILED);
+}
+
+int32_t Dialog::save(const std::string& name) {
+  return m_quallaDialog->save(name) ? (GENIE_STATUS_SUCCESS) : (GENIE_STATUS_ERROR_QUERY_FAILED);
+}
+
+int32_t Dialog::restore(const std::string& name) {
+  return m_quallaDialog->restore(name) ? (GENIE_STATUS_SUCCESS) : (GENIE_STATUS_ERROR_QUERY_FAILED);
+}
+
+#if defined(GENIE_E2T_FEATURE)
+int32_t Dialog::embeddingQuery(const void* embeddings,
+                               const uint32_t embeddingsSize,
+                               GenieDialog_SentenceCode_t sentenceCode,
+                               GenieDialog_TokenToEmbeddingCallback_t t2eCallback,
+                               GenieDialog_QueryCallback_t callback,
+                               const void* userData) {
+  uint32_t genTokenCount = 0u;
+
+  if (embeddingsSize % m_quallaDialog->getEmbeddingBufferSize() != 0) {
+    throw std::runtime_error(
+        "The embeddings buffer size must be an integer multiple of the embedding vector size in "
+        "bytes.");
+  }
+
+  const uint8_t* embeddingsSrc = static_cast<const uint8_t*>(embeddings);
+  std::vector<uint8_t> embeddingVector(embeddingsSrc, embeddingsSrc + embeddingsSize);
+
+  qualla::Dialog::T2ECallback t2eQuallaCallback{nullptr};
+  if (t2eCallback) {
+    t2eQuallaCallback = [&](const int32_t token, void* embedding, const uint32_t embd_size) {
+      t2eCallback(token, embedding, embd_size, userData);
+    };
+  }
+
+  bool status = m_quallaDialog->query(
+      embeddingVector,
+      static_cast<qualla::Sentence::Code>(sentenceCode),
+      t2eQuallaCallback,
+      [&](const std::string& response, qualla::Sentence::Code code) {
+        callback(response.c_str(), static_cast<GenieDialog_SentenceCode_t>(code), userData);
+        bool keepGoing = ++genTokenCount < m_tokenLimit;
+        if (!keepGoing && ((code == qualla::Sentence::Code::BEGIN) ||
+                           (code == qualla::Sentence::Code::CONTINUE))) {
+          callback("", GENIE_DIALOG_SENTENCE_END, userData);
+        }
+        return keepGoing;
+      });
+  qualla::Dialog::KPIs kpis = m_quallaDialog->kpis();
+  printf(
+      "\n\n[KPIS]:\nInit Time: %zu us\nPrompt Processing Time: %zu us, Prompt Processing Rate : "
+      "%f toks/sec\n"
+      "Token Generation Time: %zu us, Token Generation Rate: %f toks/sec\n",
+      kpis.init.total_usec,
+      kpis.prompt.last_usec,
+      kpis.tps.prompt,
+      kpis.generate.last_usec,
+      kpis.tps.generate);
+  return (status) ? (GENIE_STATUS_SUCCESS) : (GENIE_STATUS_ERROR_QUERY_FAILED);
+}
+#endif
+
+void Dialog::reset() { m_quallaDialog->reset(); }
+
+#if defined(GENIE_LORA_FEATURE)
+
+int32_t Dialog::applyLora(std::string loraAdapterName, std::string engine) {
+  bool status = m_quallaDialog->applyLoraAdapter(loraAdapterName, engine);
+  return (status) ? (GENIE_STATUS_SUCCESS) : (GENIE_STATUS_ERROR_GENERAL);
+}
+
+int32_t Dialog::applyLoraStrength(std::string tensorName, std::string engine, float alpha) {
+  bool status = m_quallaDialog->applyLoraStrength(tensorName, alpha, engine);
+  return (status) ? (GENIE_STATUS_SUCCESS) : (GENIE_STATUS_ERROR_GENERAL);
+}
+
+#endif
+
+int32_t Dialog::tokenQuery(const uint32_t* tokens,
+                           const uint32_t sizeInputTokens,
+                           GenieDialog_SentenceCode_t sentenceCode,
+                           GenieDialog_TokenQueryCallback_t callback,
+                           const void* userData) {
+  std::vector<uint32_t> inputTokens;
+  for (size_t i = 0; i < sizeInputTokens; i++) {
+    inputTokens.push_back(tokens[i]);
+  }
+  uint32_t genTokenCount = 0u;
+  dialogCallback.setCallBackType(qualla::QUALLA_CALLBACK_TYPE_TOKEN);
+  dialogCallback.getTokenCbFunc() = std::make_shared<
+      std::function<bool(const int32_t*, const uint32_t, qualla::Sentence::Code)>>();
+  *(dialogCallback.getTokenCbFunc()) = [&](const int32_t* responseTokens,
+                                           const uint32_t sizeResponseTokens,
+                                           qualla::Sentence::Code code) {
+    callback((const uint32_t*)responseTokens,
+             sizeResponseTokens,
+             static_cast<GenieDialog_SentenceCode_t>(code),
+             userData);
+    bool keepGoing = ++genTokenCount < m_tokenLimit;
+    if (!keepGoing &&
+        ((code == qualla::Sentence::Code::BEGIN) || (code == qualla::Sentence::Code::CONTINUE))) {
+      callback(nullptr, 0, GENIE_DIALOG_SENTENCE_END, userData);
+    }
+    return keepGoing;
+  };
+  bool status               = m_quallaDialog->query((const std::vector<uint32_t>)inputTokens,
+                                      static_cast<qualla::Sentence::Code>(sentenceCode),
+                                      dialogCallback);
+  qualla::Dialog::KPIs kpis = m_quallaDialog->kpis();
+  printf(
+      "\n\n[KPIS]:\nInit Time: %zu us\nPrompt Processing Time: %zu us, Prompt Processing Rate : "
+      "%f toks/sec\n"
+      "Token Generation Time: %zu us, Token Generation Rate: %f toks/sec\n",
+      kpis.init.total_usec,
+      kpis.prompt.last_usec,
+      kpis.tps.prompt,
+      kpis.generate.last_usec,
+      kpis.tps.generate);
+  return (status) ? (GENIE_STATUS_SUCCESS) : (GENIE_STATUS_ERROR_QUERY_FAILED);
+}
\ No newline at end of file
diff --git a/Genie/Genie/src/Dialog.hpp b/Genie/Genie/src/Dialog.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c62690c358732c8159dbc365b0c5ccda2e083de7
--- /dev/null
+++ b/Genie/Genie/src/Dialog.hpp
@@ -0,0 +1,95 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+
+#include "GenieDialog.h"
+#include "Util/HandleManager.hpp"
+#include "qualla/dialog.hpp"
+#include "qualla/DialogCallback.hpp"
+
+namespace genie {
+
+enum LORA_VERSION : uint8_t {
+  GENIE_LORA_VERSION_V1        = 0x1,
+  GENIE_LORA_VERSION_V2        = 0x2,
+  GENIE_LORA_VERSION_UNDEFINED = 0xFF
+};
+
+class Dialog {
+ public:
+  class Config {
+   public:
+    static GenieDialogConfig_Handle_t add(std::shared_ptr<Config> config);
+    static std::shared_ptr<Config> get(GenieDialogConfig_Handle_t handle);
+    static void remove(GenieDialogConfig_Handle_t handle);
+
+    Config(const char* configStr);
+    qualla::json getJson() const;
+
+   private:
+    static qnn::util::HandleManager<Config> s_manager;
+    qualla::json m_config;
+  };
+
+  static GenieDialog_Handle_t add(std::shared_ptr<Dialog> dialog);
+  static std::shared_ptr<Dialog> get(GenieDialog_Handle_t handle);
+  static void remove(GenieDialog_Handle_t handle);
+
+  qualla::DialogCallback dialogCallback;
+
+  Dialog(std::shared_ptr<Config> config);
+
+  Dialog(const Dialog&)            = delete;
+  Dialog& operator=(const Dialog&) = delete;
+  Dialog(Dialog&&)                 = delete;
+  Dialog& operator=(Dialog&&)      = delete;
+
+  int32_t query(const char* queryStr,
+                GenieDialog_SentenceCode_t sentenceCode,
+                GenieDialog_QueryCallback_t callback,
+                const void* userData);
+
+  int32_t save(const std::string&);
+
+  int32_t restore(const std::string&);
+
+#if defined(GENIE_E2T_FEATURE)
+  int32_t embeddingQuery(const void* embeddings,
+                const uint32_t embeddingsSize,
+                GenieDialog_SentenceCode_t sentenceCode,
+                GenieDialog_TokenToEmbeddingCallback_t t2eCallback,
+                GenieDialog_QueryCallback_t callback,
+                const void* userData);
+#endif
+
+
+
+  int32_t tokenQuery(const uint32_t* tokens,
+                 const uint32_t sizeInputTokens,
+                 GenieDialog_SentenceCode_t sentenceCode,
+                 GenieDialog_TokenQueryCallback_t callback,
+                 const void* userData);
+
+  void reset();
+
+#if defined(GENIE_LORA_FEATURE)
+  int32_t applyLora(std::string loraAdapterName, std::string engine);
+  int32_t applyLoraStrength(std::string tensorName, std::string engine, float alpha);
+#endif
+
+ private:
+  std::unique_ptr<qualla::Dialog> m_quallaDialog;
+  uint32_t m_tokenLimit{UINT32_MAX};
+  static qnn::util::HandleManager<Dialog> s_manager;
+  static std::atomic<std::uint32_t> s_nameCounter;
+};
+}  // namespace genie
diff --git a/Genie/Genie/src/Exception.hpp b/Genie/Genie/src/Exception.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..956c935caecb25696b823d093dee0ee9b8e85405
--- /dev/null
+++ b/Genie/Genie/src/Exception.hpp
@@ -0,0 +1,27 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <exception>
+#include <string>
+
+#include "GenieCommon.h"
+
+namespace genie {
+
+class Exception : public std::runtime_error {
+ public:
+  Exception(Genie_Status_t status, std::string what) : std::runtime_error(what), m_status(status) {}
+  Genie_Status_t status() const { return m_status; }
+
+ private:
+  Genie_Status_t m_status;
+};
+
+}  // namespace genie
diff --git a/Genie/Genie/src/GenieCommon.cpp b/Genie/Genie/src/GenieCommon.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ed4f084cf83b154d7add8a143301ed12090e2e0f
--- /dev/null
+++ b/Genie/Genie/src/GenieCommon.cpp
@@ -0,0 +1,15 @@
+//=============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//=============================================================================
+
+#include "GenieCommon.h"
+
+uint32_t Genie_getApiMajorVersion(void) { return GENIE_API_VERSION_MAJOR; }
+
+uint32_t Genie_getApiMinorVersion(void) { return GENIE_API_VERSION_MINOR; }
+
+uint32_t Genie_getApiPatchVersion(void) { return GENIE_API_VERSION_PATCH; }
diff --git a/Genie/Genie/src/GenieDialog.cpp b/Genie/Genie/src/GenieDialog.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6f6f3116de4a0261b15aed2194cfc17b8b3bcda8
--- /dev/null
+++ b/Genie/Genie/src/GenieDialog.cpp
@@ -0,0 +1,249 @@
+//=============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//=============================================================================
+
+#include "Dialog.hpp"
+#include "Exception.hpp"
+#include "GenieDialog.h"
+#include "Macro.hpp"
+#include "Util/HandleManager.hpp"
+#include "qualla/detail/json.hpp"
+
+using namespace genie;
+
+GENIE_API
+Genie_Status_t GenieDialogConfig_createFromJson(const char* str,
+                                                GenieDialogConfig_Handle_t* configHandle) {
+  try {
+    GENIE_ENSURE(str, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    GENIE_ENSURE(configHandle, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    auto config = std::make_shared<Dialog::Config>(str);
+    GENIE_ENSURE(config, GENIE_STATUS_ERROR_MEM_ALLOC);
+    *configHandle = genie::Dialog::Config::add(config);
+  } catch (const qualla::json::parse_error& e) {
+    std::cerr << e.what() << std::endl;
+    return GENIE_STATUS_ERROR_JSON_FORMAT;
+  } catch (const Exception& e) {
+    std::cerr << e.what() << std::endl;
+    return e.status();
+  } catch (const std::exception& e) {
+    std::cerr << e.what() << std::endl;
+    return GENIE_STATUS_ERROR_GENERAL;
+  }
+  return GENIE_STATUS_SUCCESS;
+}
+
+GENIE_API
+Genie_Status_t GenieDialogConfig_free(const GenieDialogConfig_Handle_t configHandle) {
+  try {
+    GENIE_ENSURE(configHandle, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    {
+      // Check if the dialog actually exists
+      auto configObj = genie::Dialog::Config::get(configHandle);
+      GENIE_ENSURE(configObj, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    }
+    genie::Dialog::Config::remove(configHandle);
+  } catch (const std::exception& e) {
+    return GENIE_STATUS_ERROR_GENERAL;
+  }
+  return GENIE_STATUS_SUCCESS;
+}
+
+GENIE_API
+Genie_Status_t GenieDialog_create(const GenieDialogConfig_Handle_t configHandle,
+                                  GenieDialog_Handle_t* dialogHandle) {
+  try {
+    GENIE_ENSURE(dialogHandle, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+
+    // Get config object
+    auto configObj = genie::Dialog::Config::get(configHandle);
+    GENIE_ENSURE(configObj, GENIE_STATUS_ERROR_INVALID_HANDLE);
+
+    // Create dialog
+    auto dialog = std::make_shared<genie::Dialog>(configObj);
+    GENIE_ENSURE(dialog, GENIE_STATUS_ERROR_MEM_ALLOC);
+
+    // Create Handle
+    *dialogHandle = genie::Dialog::add(dialog);
+  } catch (const std::exception& e) {
+    std::cerr << e.what() << std::endl;
+    return GENIE_STATUS_ERROR_GENERAL;
+  }
+
+  // Return SUCCESS
+  return GENIE_STATUS_SUCCESS;
+}
+
+GENIE_API
+Genie_Status_t GenieDialog_query(const GenieDialog_Handle_t dialogHandle,
+                                 const char* queryStr,
+                                 const GenieDialog_SentenceCode_t sentenceCode,
+                                 const GenieDialog_QueryCallback_t callback,
+                                 const void* userData) {
+  int32_t status;
+
+  try {
+    GENIE_ENSURE(dialogHandle, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    auto dialog = genie::Dialog::get(dialogHandle);
+    GENIE_ENSURE(dialog, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    GENIE_ENSURE(queryStr, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    GENIE_ENSURE(callback, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+
+    switch (sentenceCode) {
+      case GENIE_DIALOG_SENTENCE_COMPLETE:
+      case GENIE_DIALOG_SENTENCE_BEGIN:
+      case GENIE_DIALOG_SENTENCE_CONTINUE:
+      case GENIE_DIALOG_SENTENCE_END:
+      case GENIE_DIALOG_SENTENCE_ABORT:
+        // Do nothing
+        break;
+      default:
+        return GENIE_STATUS_ERROR_INVALID_ARGUMENT;
+    }
+
+    status = dialog->query(queryStr, sentenceCode, callback, userData);
+  } catch (const std::exception& e) {
+    std::cerr << e.what() << std::endl;
+    return GENIE_STATUS_ERROR_GENERAL;
+  }
+
+  return status;
+}
+
+GENIE_API
+Genie_Status_t GenieDialog_save(const GenieDialog_Handle_t dialogHandle, const char* path) {
+  int32_t status;
+
+  try {
+    GENIE_ENSURE(dialogHandle, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    auto dialog = genie::Dialog::get(dialogHandle);
+    GENIE_ENSURE(dialog, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    GENIE_ENSURE(path, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    status = dialog->save(path);
+  } catch (const std::exception& e) {
+    std::cerr << e.what() << std::endl;
+    return GENIE_STATUS_ERROR_GENERAL;
+  }
+
+  return status;
+}
+
+GENIE_API
+Genie_Status_t GenieDialog_restore(const GenieDialog_Handle_t dialogHandle, const char* path) {
+  int32_t status;
+
+  try {
+    GENIE_ENSURE(dialogHandle, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    auto dialog = genie::Dialog::get(dialogHandle);
+    GENIE_ENSURE(dialog, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    GENIE_ENSURE(path, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    status = dialog->restore(path);
+  } catch (const std::exception& e) {
+    std::cerr << e.what() << std::endl;
+    return GENIE_STATUS_ERROR_GENERAL;
+  }
+
+  return status;
+}
+
+GENIE_API
+Genie_Status_t GenieDialog_reset(const GenieDialog_Handle_t dialogHandle) {
+  try {
+    GENIE_ENSURE(dialogHandle, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    auto dialog = genie::Dialog::get(dialogHandle);
+    GENIE_ENSURE(dialog, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    dialog->reset();
+  } catch (const std::exception& e) {
+    return GENIE_STATUS_ERROR_GENERAL;
+  }
+  return GENIE_STATUS_SUCCESS;
+}
+
+#if defined(GENIE_LORA_FEATURE)
+
+GENIE_API
+Genie_Status_t GenieDialog_applyLora(const GenieDialog_Handle_t dialogHandle,
+                                     const char* engine,
+                                     const char* loraAdapterName) {
+  int32_t status;
+  try {
+    GENIE_ENSURE(dialogHandle, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    auto dialog = genie::Dialog::get(dialogHandle);
+    GENIE_ENSURE(dialog, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    GENIE_ENSURE(engine, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    std::string eng(engine);
+    GENIE_ENSURE(loraAdapterName, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    std::string loraName(loraAdapterName);
+    status = dialog->applyLora(loraName, eng);
+  } catch (const std::exception& e) {
+    return GENIE_STATUS_ERROR_GENERAL;
+  }
+  return status;
+}
+
+GENIE_API
+Genie_Status_t GenieDialog_setLoraStrength(const GenieDialog_Handle_t dialogHandle,
+                                           const char* engine,
+                                           const char* tensorName,
+                                           const float alpha) {
+  int32_t status;
+  try {
+    GENIE_ENSURE(dialogHandle, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    auto dialog = genie::Dialog::get(dialogHandle);
+    GENIE_ENSURE(dialog, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    GENIE_ENSURE(engine, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    std::string eng(engine);
+    GENIE_ENSURE(tensorName, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    std::string alphaTensorName(tensorName);
+    GENIE_ENSURE_NOT_EMPTY(alphaTensorName, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    status = dialog->applyLoraStrength(tensorName, eng, alpha);
+  } catch (const std::exception& e) {
+    return GENIE_STATUS_ERROR_GENERAL;
+  }
+  return status;
+}
+
+#endif
+
+GENIE_API
+Genie_Status_t GenieDialog_tokenQuery(const GenieDialog_Handle_t dialogHandle,
+                                      const uint32_t* inputTokens,
+                                      const uint32_t numTokens,
+                                      const GenieDialog_SentenceCode_t sentenceCode,
+                                      const GenieDialog_TokenQueryCallback_t callback,
+                                      const void* userData) {
+  bool status;
+  try {
+    GENIE_ENSURE(dialogHandle, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    auto dialog = genie::Dialog::get(dialogHandle);
+    GENIE_ENSURE(dialog, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    GENIE_ENSURE(inputTokens, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    GENIE_ENSURE(callback, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    status = dialog->tokenQuery(inputTokens, numTokens, sentenceCode, callback, userData);
+  } catch (const std::exception& e) {
+    std::cerr << e.what() << std::endl;
+    return GENIE_STATUS_ERROR_GENERAL;
+  }
+
+  return status;
+}
+
+GENIE_API
+Genie_Status_t GenieDialog_free(const GenieDialog_Handle_t dialogHandle) {
+  try {
+    GENIE_ENSURE(dialogHandle, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    {
+      // Check if the dialog actually exists
+      auto dialog = genie::Dialog::get(dialogHandle);
+      GENIE_ENSURE(dialog, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    }
+    genie::Dialog::remove(dialogHandle);
+  } catch (const std::exception& e) {
+    return GENIE_STATUS_ERROR_GENERAL;
+  }
+  return GENIE_STATUS_SUCCESS;
+}
diff --git a/Genie/Genie/src/GenieDialogEmbedding.cpp b/Genie/Genie/src/GenieDialogEmbedding.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e5162cb8e4b57b9523b2e57e3568d46ea261f8e2
--- /dev/null
+++ b/Genie/Genie/src/GenieDialogEmbedding.cpp
@@ -0,0 +1,41 @@
+//=============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//=============================================================================
+
+#include "Dialog.hpp"
+#include "Exception.hpp"
+#include "GenieDialog.h"
+#include "Macro.hpp"
+#include "Util/HandleManager.hpp"
+#include "qualla/detail/json.hpp"
+
+using namespace genie;
+
+GENIE_API
+Genie_Status_t GenieDialog_embeddingQuery(const GenieDialog_Handle_t dialogHandle,
+                                          const void* embeddings,
+                                          const uint32_t embeddingsSize,
+                                          const GenieDialog_SentenceCode_t sentenceCode,
+                                          const GenieDialog_TokenToEmbeddingCallback_t t2eCallback,
+                                          const GenieDialog_QueryCallback_t callback,
+                                          const void* userData) {
+  Genie_Status_t status;
+  try {
+    GENIE_ENSURE(dialogHandle, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    auto dialog = genie::Dialog::get(dialogHandle);
+    GENIE_ENSURE(dialog, GENIE_STATUS_ERROR_INVALID_HANDLE);
+    GENIE_ENSURE(embeddings, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    GENIE_ENSURE(callback, GENIE_STATUS_ERROR_INVALID_ARGUMENT);
+    status = dialog->embeddingQuery(
+        embeddings, embeddingsSize, sentenceCode, t2eCallback, callback, userData);
+  } catch (const std::exception& e) {
+    std::cerr << e.what() << std::endl;
+    return GENIE_STATUS_ERROR_GENERAL;
+  }
+
+  return status;
+}
diff --git a/Genie/Genie/src/Macro.hpp b/Genie/Genie/src/Macro.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c50b1585e4cee424b3b744415b4d74233d7a8c31
--- /dev/null
+++ b/Genie/Genie/src/Macro.hpp
@@ -0,0 +1,101 @@
+//============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//=============================================================================
+
+#pragma once
+
+//======================================================================================================================
+// Error generation macros
+//======================================================================================================================
+
+#define GENIE_LOG_ERROR(fmt, ...)
+
+#define GENIE_ENSURE_MSG(value, return_error, msg) \
+  do {                                             \
+    if (!(value)) {                                \
+      GENIE_LOG_ERROR(" " msg);                    \
+      return return_error;                         \
+    }                                              \
+  } while (0)
+
+#define GENIE_ENSURE(value, return_error)          \
+  do {                                             \
+    if (!(value)) {                                \
+      GENIE_LOG_ERROR("%s was not true.", #value); \
+      return return_error;                         \
+    }                                              \
+  } while (0)
+
+#define GENIE_ENSURE_STATUS(status, return_error) \
+  do {                                            \
+    if ((status) != GENIE_SUCCESS) {              \
+      return return_error;                        \
+    }                                             \
+  } while (0)
+
+#define GENIE_ENSURE_EQ(a, b, return_error)                     \
+  do {                                                          \
+    if ((a) != (b)) {                                           \
+      GENIE_LOG_ERROR("%s != %s (%d != %d)", #a, #b, (a), (b)); \
+      return return_error;                                      \
+    }                                                           \
+  } while (0)
+
+#define GENIE_ENSURE_NOT_EMPTY(value, return_error) \
+  do {                                              \
+    if (value.empty()) {                            \
+      GENIE_LOG_ERROR("%s was not true.", #value);  \
+      return return_error;                          \
+    }                                               \
+  } while (0)
+//======================================================================================================================
+// JSON config macros
+//======================================================================================================================
+
+#define JSON_ENFORCE_OBJECT()                                                                 \
+  if (!item.value().is_object()) {                                                            \
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,                                           \
+                    "Invalid " + component + " config: " + item.key() + " is not an object"); \
+  }
+
+#define JSON_ENFORCE_ARRAY()                                                                 \
+  if (!item.value().is_array()) {                                                            \
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,                                          \
+                    "Invalid " + component + " config: " + item.key() + " is not an array"); \
+  }
+
+#define JSON_ENFORCE_ARRAY_OR_OBJECT()                                                     \
+  if (!item.value().is_array() && !item.value().is_object()) {                             \
+    throw Exception(                                                                       \
+        GENIE_STATUS_ERROR_JSON_SCHEMA,                                                    \
+        "Invalid " + component + " config: " + item.key() + " is not an array or object"); \
+  }
+
+#define JSON_ENFORCE_NUMERIC()                                                              \
+  if (!item.value().is_number()) {                                                          \
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,                                         \
+                    "Invalid " + component + " config: " + item.key() + " is not numeric"); \
+  }
+
+#define JSON_ENFORCE_ARRAY_OR_NUMERIC()                                                     \
+  if (!item.value().is_number() && !item.value().is_array()) {                              \
+    throw Exception(                                                                        \
+        GENIE_STATUS_ERROR_JSON_SCHEMA,                                                     \
+        "Invalid " + component + " config: " + item.key() + " is not an array or numeric"); \
+  }
+
+#define JSON_ENFORCE_BOOLEAN()                                                              \
+  if (!item.value().is_boolean()) {                                                         \
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,                                         \
+                    "Invalid " + component + " config: " + item.key() + " is not boolean"); \
+  }
+
+#define JSON_ENFORCE_STRING()                                                                \
+  if (!item.value().is_string()) {                                                           \
+    throw Exception(GENIE_STATUS_ERROR_JSON_SCHEMA,                                          \
+                    "Invalid " + component + " config: " + item.key() + " is not a string"); \
+  }
diff --git a/Genie/Genie/src/Util/HandleGenerator.hpp b/Genie/Genie/src/Util/HandleGenerator.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..09ed32c97015727885b8418441ffb36efe5893c1
--- /dev/null
+++ b/Genie/Genie/src/Util/HandleGenerator.hpp
@@ -0,0 +1,62 @@
+//==============================================================================
+//
+//  Copyright (c) 2019-2020,2023 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <mutex>
+
+namespace qnn {
+namespace util {
+
+typedef std::size_t Handle_t;
+
+class HandleGenerator final {
+  static_assert(std::is_integral<Handle_t>::value, "Handle must be an integral type");
+  static_assert((sizeof(Handle_t) == 8) || (sizeof(Handle_t) == 4),
+                "Implementation of HandleGenerator::bswap() for sizeof(std::size_t) is required");
+
+ public:
+  HandleGenerator(const HandleGenerator&) = delete;
+  HandleGenerator& operator=(const HandleGenerator&) = delete;
+  HandleGenerator(HandleGenerator&&)                 = delete;
+  HandleGenerator& operator=(HandleGenerator&&) = delete;
+
+  static Handle_t generate(const void* const addr) {
+    return (bswap((Handle_t)addr) ^ (Handle_t)s_operand);
+  }
+  static const void* reverse(const Handle_t handle) {
+    return (void*)bswap(handle ^ (Handle_t)s_operand);
+  }
+  static constexpr Handle_t invalid() { return s_operand; }
+
+ private:
+  HandleGenerator() {}
+
+  static uint32_t bswap32(const uint32_t val) {
+    return (val >> 24U) | ((val >> 8U) & 0xff00U) | ((val << 8U) & 0xff0000U) | (val << 24U);
+  }
+
+  static uint64_t bswap64(const uint64_t val) {
+    return ((bswap32(val) + 0ULL) << 32U) | bswap32(val >> 32U);
+  }
+
+  template <typename T>
+  static size_t bswap(T val) {
+    if (sizeof(T) == 4) {
+      return bswap32(val);
+    } else {
+      return bswap64(val);
+    }
+  }
+
+  // Magic number generated via "openssl rand -hex 8"
+  static constexpr Handle_t s_operand = (Handle_t)0xd4c2416534bcdc9b;
+};
+
+}  // namespace util
+}  // namespace qnn
diff --git a/Genie/Genie/src/Util/HandleManager.hpp b/Genie/Genie/src/Util/HandleManager.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..375d4dcc01c7e49ba9969b0d2244aef7f5221f1c
--- /dev/null
+++ b/Genie/Genie/src/Util/HandleManager.hpp
@@ -0,0 +1,84 @@
+//==============================================================================
+//
+//  Copyright (c) 2019-2020 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include "HandleGenerator.hpp"
+
+namespace qnn {
+namespace util {
+
+template <typename T>
+class HandleManager {
+ public:
+  HandleManager()                     = default;
+  HandleManager(const HandleManager&) = delete;
+  HandleManager& operator=(const HandleManager&) = delete;
+  HandleManager(HandleManager&&)                 = delete;
+  HandleManager& operator=(HandleManager&&) = delete;
+
+  Handle_t add(std::shared_ptr<T> item) {
+    std::lock_guard<std::mutex> locker(m_itemsMtx);
+
+    if (!item) {
+      return HandleGenerator::invalid();
+    }
+
+    auto handle     = HandleGenerator::generate(item.get());
+    m_items[handle] = item;
+    return handle;
+  }
+
+  Handle_t add(T* item) { return add(std::shared_ptr<T>(item)); }
+
+  Handle_t add(std::weak_ptr<T> item) { return add(item.lock()); }
+
+  std::shared_ptr<T> get(Handle_t handle) {
+    std::lock_guard<std::mutex> locker(m_itemsMtx);
+
+    auto it = m_items.find(handle);
+    if (it == m_items.end()) {
+      return std::shared_ptr<T>(nullptr);
+    }
+
+    return it->second;
+  }
+
+  typedef std::function<bool(const std::pair<Handle_t, std::shared_ptr<T>>&)> UnaryPredicate_t;
+
+  Handle_t findIf(UnaryPredicate_t pred) const {
+    auto it = std::find_if(m_items.begin(), m_items.end(), pred);
+    if (it == m_items.end()) {
+      return HandleGenerator::invalid();
+    }
+
+    return it->first;
+  }
+
+  size_t remove(Handle_t handle) {
+    std::lock_guard<std::mutex> locker(m_itemsMtx);
+    return m_items.erase(handle);
+  }
+
+  void clear() { m_items.clear(); }
+
+  const std::unordered_map<Handle_t, std::shared_ptr<T>>& getItems() const { return m_items; }
+
+ private:
+  std::unordered_map<Handle_t, std::shared_ptr<T>> m_items;
+  std::mutex m_itemsMtx;
+};
+
+}  // namespace util
+}  // namespace qnn
diff --git a/Genie/Genie/src/qualla/context.cpp b/Genie/Genie/src/qualla/context.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9a71ce9c1754ea9bdee044d01fab6a99ef79c543
--- /dev/null
+++ b/Genie/Genie/src/qualla/context.cpp
@@ -0,0 +1,118 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/logger.hpp>
+#include <qualla/context.hpp>
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/onload.hpp>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+namespace qualla {
+
+Context::Context(Env& env, const std::string& name, const qualla::json& json)
+    : _name(name), _env(env), _conf(json) {
+    _env.logger().debug(fmt::format("ctx-new: {} config {}", _name, _conf.dump()));
+
+    qualla::Config conf(json, "context:");
+    _size    = conf.optional<size_t>("size", 1024);
+    _size    = conf.optional<size_t>("n-ctx", _size); // alternative name
+    _n_vocab = conf.optional<size_t>("n-vocab", 32000);
+    _n_embd  = conf.optional<size_t>("n-embd", 1024);
+    _embedding_length = conf.optional<int32_t>("embedding-length", -1);
+    _embedding_datatype = conf.optional<std::string>("embedding-datatype", "float32");
+    // For backward compatibility. When eot-token is removed, this logic can be simplified
+    // Currently, EOT is marked as default truncating token if available
+    int32_t eot_tok = conf.optional<int32_t>("eot-token", -1);
+    if (eot_tok >= 0) _eos_tok_list.insert(eot_tok);
+
+    const qualla::json eos_conf = conf.optional<qualla::json>("eos-token", _eos_tok);
+    if (eos_conf.is_array() && eos_conf.size() > 0) {
+        const std::vector<int32_t>& eos_tokens = eos_conf.get<std::vector<int32_t>>();
+        _eos_tok                               = eos_tokens[0];
+        for (const int32_t& eos_tok : eos_tokens)
+            _eos_tok_list.insert(eos_tok);
+    } else if (eos_conf.is_number_integer()) {
+        int32_t eos_tok = eos_conf.get<int32_t>();
+        _eos_tok        = (eot_tok >= 0) ? eot_tok : eos_tok;
+        _eos_tok_list.insert(eos_tok);
+    }
+
+    _pad_tok = conf.optional<qualla::json>("pad-token", _eos_tok);
+}
+
+std::unique_ptr<Context> Context::create(
+        Env&                env,
+        const std::string&  name,
+        const qualla::json& conf
+) {
+    return std::make_unique<Context>(env, name, conf);
+}
+
+std::unique_ptr<Context> Context::create(
+        Env&               env,
+        const std::string& name,
+        std::istream&      json_stream
+) {
+    return create(env, name, json::parse(json_stream));
+}
+
+std::unique_ptr<Context> Context::create(
+        Env&               env,
+        const std::string& name,
+        const std::string& json_str
+) {
+    return create(env, name, json::parse(json_str));
+}
+
+#ifdef QUALLA_STATIC
+
+// This is a hack to make sure all core bits are linked in for the static build
+
+extern void needFileLogger();
+extern void needStdoutLogger();
+extern void needBasicSampler();
+extern void needBasicDialog();
+extern void needKvShareDialog();
+extern void needSpdDialog();
+extern void needSsdDialog();
+extern void needLadeDialog();
+extern void needMultistreamDialog();
+
+    #ifdef QUALLA_ENGINE_QNN_HTP
+extern void needQnnHtpEngine();
+    #endif
+
+    #ifdef QUALLA_ENGINE_QNN_CPU
+extern void needQnnCpuEngine();
+    #endif
+
+static OnLoad needs([]() {
+    needStdoutLogger();
+    needFileLogger();
+    needBasicDialog();
+    needBasicSampler();
+    needKvShareDialog();
+    needSpdDialog();
+    needSsdDialog();
+    needLadeDialog();
+    needMultistreamDialog();
+
+    #ifdef QUALLA_ENGINE_QNN_HTP
+    needQnnHtpEngine();
+    #endif
+
+    #ifdef QUALLA_ENGINE_QNN_CPU
+    needQnnCpuEngine();
+    #endif
+});
+
+#endif
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/dialog.cpp b/Genie/Genie/src/qualla/dialog.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ef51c2094fa07125430ad7eb87382ba11a27f3ca
--- /dev/null
+++ b/Genie/Genie/src/qualla/dialog.cpp
@@ -0,0 +1,590 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All rights reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/dialog.hpp>
+#include <qualla/logger.hpp>
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/timer.hpp>
+#include <qualla/detail/sampler-utils.hpp>
+
+#include <algorithm>
+#include <functional>
+#include <fstream>
+#include <string>
+#include <unordered_map>
+#include <filesystem>
+#include <iostream>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+#define __INFO(__fmt, ...)  _env->logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env->logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env->logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env->logger().post(Logger::KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace fs = std::filesystem;
+
+namespace qualla {
+
+Dialog::Dialog(std::shared_ptr<Env> env, const std::string& name, const qualla::json& json)
+    : _env(env) {
+    Timer start;
+
+
+
+    __DEBUG("dialog-new: {} config {}", name, json.dump());
+
+    using qc = qualla::Config;
+
+    // Create Gpiomarker and reset the gpio status to low
+    const qualla::json& gpio_conf = qc::optional<qualla::json>(json, "gpio", {});
+    _gpio_marker                  = GpioMarker::create(gpio_conf);
+
+    _gpio_marker->set();
+
+    // Create the context first
+    _ctx = Context::create(*_env, name, qc::mandatory<qualla::json>(json, "context"));
+
+    // Parse prompt config
+    const qualla::json& pmt_conf = qc::optional<qualla::json>(json, "prompt", {});
+    _prompt_type                 = qc::optional<std::string>(pmt_conf, "type", "llama2");
+    _sys_tags   = qc::optional<std::vector<std::string>>(pmt_conf, "sys-tags", {"", ""});
+    _inst_tags  = qc::optional<std::vector<std::string>>(pmt_conf, "inst-tags", {"", ""});
+    _role_tags  = qc::optional<std::vector<std::string>>(pmt_conf, "role-tags", {"", ""});
+    _sys_prompt = qc::optional<std::string>(pmt_conf, "sys-prompt", "");
+
+    const std::vector<std::string>& stop_sequence =
+            qc::optional<std::vector<std::string>>(pmt_conf, "stop-sequence", {});
+    _stop_sequence = SequenceMatchTrie(stop_sequence);
+    
+    // Create Tokenizer
+    // TODO: auto-detect / validate n_vocab with tokenizer vocab
+    fs::path tok_path = _env->path().models / qc::mandatory<std::string>(json, "tokenizer");
+    _tokenizer        = Tokenizer::create(*_ctx, tok_path);
+
+    // Create Sampler(s)
+    auto add_sampler = [&](const qualla::json& j) {
+        std::string role = qc::optional<std::string>(j, "role", "primary");
+        _sampler[role]   = Sampler::create(*_ctx, j);
+    };
+
+    const qualla::json& sam_conf = qc::mandatory<qualla::json>(json, "sampler");
+    if (sam_conf.is_array()) {
+        for (auto sc : sam_conf) {
+            add_sampler(sc);
+        }
+    } else
+        add_sampler(sam_conf);
+
+
+
+
+    // Create Engine(s)
+    auto add_engine = [&](const qualla::json& j) {
+        std::string role = qc::optional<std::string>(j, "role", "primary");
+
+        _engine[role]    = Engine::create(*_ctx, j);
+
+        using FF = Engine::Feature::Flags;
+
+
+        if (!_engine[role]->supports(FF::OUTPUT_LOGITS))
+            throw std::runtime_error("the engine must output Logits");
+    };
+
+
+
+    const qualla::json& eng_conf = qc::mandatory<qualla::json>(json, "engine");
+  
+
+    if (eng_conf.is_array()) {
+        
+        for (auto ec : eng_conf) {
+            add_engine(ec);
+        }
+    } else{
+        add_engine(eng_conf);
+
+    }
+
+    // Store input type (token, embedding, etc) from the engine.
+    // This assumes multi-engine usecases use matching input types.
+    m_inputType = _engine.begin()->second->getInputType();
+
+    _kpis.init.update(start.elapsed_usec());
+}
+
+Dialog::~Dialog() {}
+
+static bool __no_response_query(const std::string&, Sentence::Code) {
+    return false;
+}
+
+static bool __no_response_token(const int32_t*, const uint32_t, Sentence::Code) {
+    return false;
+}
+
+static bool __no_response(const std::string&, Sentence::Code) {
+    return false;
+}
+
+void Dialog::getTopK(std::vector<float>& logits, std::vector<std::vector<int32_t>>& tokens, size_t topK, float pThreshold, Dialog::Callback callback) {
+
+    auto& sampler = *_sampler["primary"];
+
+    // Sample top-k logits but with a minimum probability threshold
+#if defined(__GNUC__) && !defined(__clang__)
+    std::span<float> indexed_logits_span(logits);
+    IndexedLogits indexed_logits(indexed_logits_span, sampler.rng());
+#else
+    IndexedLogits indexed_logits(std::span{logits.data(),logits.size()}, sampler.rng());
+#endif
+    indexed_logits.softmax();
+    indexed_logits.topK(topK);
+
+    for (int i = 0; i < topK; i++) {
+
+        _last_tok = indexed_logits.indices[i];
+
+        // Only sample tokens above some probability threshold
+        // TODO: Modify sampling algorithm as necessary
+        if (indexed_logits.probs[i] < pThreshold) {
+            break;
+        } else if (_ctx->is_eos(_last_tok)) {
+            callback("", Sentence::CONTINUE);
+        } else {
+            tokens.push_back({_last_tok});
+        }
+    }
+}
+
+bool Dialog::query(const std::string& str, Sentence::Code scode, Dialog::Callback callback) {
+    std::vector<int32_t> p_vec; // prompt tokens
+    std::string          p_str; // prompt string
+
+    p_vec.reserve(1024);
+
+    if (scode == Sentence::COMPLETE || scode == Sentence::BEGIN) {
+        // Reset prompt/gen counts for new query
+        _n_prompt    = 0;
+        _n_generated = 0;
+        _n_previous_prompt    = 0;
+        _n_previous_generated = 0;
+
+
+        if (_last_tok >= 0 && !_ctx->is_eos(_last_tok)) p_vec.push_back(_last_tok);
+
+        p_str = _inst_tags[0];
+
+        if (!_n_queries) {
+            // First query. Prepend sys-prompt.
+            p_str += _sys_tags[0] + _sys_prompt + _sys_tags[1];
+        } else {
+            // Add EOS explicitly if the last query was aborted prematurely.
+            if (_ctx->eos_tok() >= 0) p_vec.push_back(_ctx->eos_tok());
+        }
+
+        // Add BOS
+        if (_ctx->bos_tok() >= 0) {
+            p_vec.push_back(_ctx->bos_tok());
+        }
+    }
+
+    // FIXME: make this more generic
+    if (_prompt_type == "llama3") {
+        p_str += _sys_tags[0] + _role_tags[1] + _sys_tags[1] + str + _inst_tags[2];
+    } else {
+        p_str += str;
+    }
+
+    if (scode == Sentence::COMPLETE || scode == Sentence::END) {
+        if (_prompt_type == "llama3") {
+            p_str += _sys_tags[0] + _role_tags[2] + _sys_tags[1];
+        } else {
+            p_str += _inst_tags[1];
+        }
+    }
+
+    _env->logger().post(Logger::DEBUG, [&]() {
+      qualla::json j{{"string", str}, {"prompt", p_str}};
+      return fmt::format("dialog-query: {} {}", _ctx->name(), j.dump());
+    });
+
+    _n_queries++;
+
+    _tokenizer->encode(p_str, p_vec);
+
+    __DEBUG("dialog-tokens: {} {}", _ctx->name(), p_vec);
+    __DEBUG("dialog-text: \"{}\"", p_str);
+
+    if (scode == Sentence::COMPLETE || scode == Sentence::END) {
+        // Detect stop sequences here
+        if (!_stop_sequence.empty()) {
+            _stop_sequence.reset();
+            return process(p_vec, [&](const std::string& str, Sentence::Code c) {
+              // Check for stop sequence and end inference when stop sequence is found
+              if (_stop_sequence.process_next_string(str)) {
+                callback(str, c); // Emit sequences until match is complete
+                return false;
+              }
+
+              // Else, return normal callback function
+              return callback(str, c);
+            });
+        }
+
+        return process(p_vec, callback);
+    }
+
+    return process(p_vec, __no_response);
+}
+
+bool Dialog::query(const std::vector<uint32_t>& input, Sentence::Code scode, qualla::DialogCallback& callback) {
+    std::vector<int32_t> p_vec; // prompt tokens
+    p_vec.reserve(1024);
+
+    if (scode == Sentence::COMPLETE || scode == Sentence::BEGIN) {
+        // Reset prompt/gen counts for new query
+        _n_prompt = 0;
+        _n_generated = 0;
+        _n_previous_prompt = 0;
+        _n_previous_generated = 0;
+
+        if (_last_tok >= 0)
+            p_vec.push_back(_last_tok);
+
+        // Add EOS explicitly if the last query was aborted prematurely.
+        if (_n_queries && _last_tok != _ctx->eos_tok()) {
+            p_vec.push_back(_ctx->eos_tok());
+        }
+        // Add BOS
+        if (_ctx->bos_tok() >= 0) {
+            p_vec.push_back(_ctx->bos_tok());
+        }
+    }
+
+    p_vec.insert(p_vec.end(), input.begin(), input.end());
+    __DEBUG("dialog-tokens: {} {}", _ctx->name(), p_vec);
+
+    _n_queries++;
+
+    if (scode == Sentence::COMPLETE || scode == Sentence::END) {
+        return process(p_vec, callback);
+    }
+
+    DialogCallback callback_return_token(QUALLA_CALLBACK_TYPE_TOKEN);
+    *(callback_return_token.getTokenCbFunc()) = __no_response_token;
+    return process(p_vec, callback_return_token);
+}
+
+bool Dialog::query(
+        std::vector<uint8_t>& embedding_vectors,
+        Sentence::Code        scode,
+        T2ECallback           t2eCallback,
+        Dialog::Callback      callback
+) {
+    _n_queries++;
+    if (scode == Sentence::COMPLETE || scode == Sentence::END) {
+        return process(embedding_vectors, t2eCallback, callback);
+    }
+    // Only process, no output
+    return process(embedding_vectors, t2eCallback, [&](const std::string&, Sentence::Code) {
+        return false;
+    });
+}
+
+bool Dialog::prime(const std::string& str) {
+    bool r = query(str, Sentence::COMPLETE, __no_response);
+
+    // End with EOS as we want the primer to be self-contained
+    _last_tok = _ctx->eos_tok();
+
+    return r;
+}
+
+bool Dialog::save(const std::string& o_name) {
+    Timer start;
+
+    // Save using session name unless override is provided
+    std::string name      = o_name.empty() ? _ctx->name() : o_name;
+    fs::path    save_path = name;
+
+    if (!_n_past) {
+        __ERROR("dialog-save: {} : nothing to save yet", name);
+        return false;
+    }
+
+    __INFO("dialog-save: saving as {} {}", name, save_path.string());
+
+    if (!fs::exists(save_path) && !fs::create_directories(save_path)) {
+        __ERROR("dialog-save: {} : failed to create cache directory", name);
+        return false;
+    }
+
+    // Save Dialog state
+    qualla::json j{
+            {"n-past", _n_past},
+            {"n-prompt", _n_prompt},
+            {"n-generated", _n_generated},
+            {"n-queries", _n_queries},
+            {"last-tok", _last_tok}
+    };
+    {
+        fs::path      p = save_path / "dialog.json";
+        std::ofstream f(p);
+        f << j;
+    }
+
+    // Save Engines (mandatory)
+    for (auto& e : _engine) {
+        if (!e.second->save(name)) {
+            __ERROR("dialog-save: {} : unable to save {} engine", name, e.first);
+            return false;
+        }
+    }
+
+    // Save Samplers (optional)
+    for (auto& s : _sampler) {
+        if (!s.second->save(name)) {
+            __WARN("dialog-save: {} : unable to save {} sampler", name, s.first);
+        }
+    }
+
+    _kpis.save.update(start.elapsed_usec());
+
+    return true;
+}
+
+bool Dialog::restore(const std::string& o_name) {
+    Timer start;
+
+    // Restore using session name unless override is provided
+    std::string name      = o_name.empty() ? _ctx->name() : o_name;
+    fs::path    restore_path = name;
+
+    __INFO("dialog-restore: restoring from {} {}", name, restore_path.string());
+
+    // Try to restore the Dialog state (optional)
+    // If this fails we reset everything and try to restore the engine.
+    qualla::json j{};
+    {
+        fs::path p = restore_path / "dialog.json";
+        if (fs::exists(p)) {
+            std::ifstream f(p);
+            j = qualla::json::parse(f);
+        } else {
+            __DEBUG("dialog-restore: {} : internal state not restored", name);
+        }
+    }
+
+    using qc     = qualla::Config;
+    _n_past      = qc::optional<uint32_t>(j, "n-past", 0);
+    _n_prompt    = qc::optional<uint32_t>(j, "n-prompt", 0);
+    _n_generated = qc::optional<uint32_t>(j, "n-generated", 0);
+    _n_queries   = qc::optional<uint32_t>(j, "n-queries", 1);
+    _last_tok    = qc::optional<int32_t>(j, "last-tok", _ctx->eos_tok());
+
+    // Restore Engines (mandatory)
+    for (auto& e : _engine) {
+        uint32_t n = e.second->restore(name);
+        if (!n) {
+            __ERROR("dialog-restore: {} : unable to restore {} engine", name, e.first);
+            return false;
+        }
+
+        // Restore n_past from the engine state
+        if (_n_past && n != _n_past) {
+            __WARN("dialog-restore: {} : n-past mismatch : {} engine {} intern {}",
+                   name,
+                   e.first,
+                   _n_past,
+                   n);
+            // Keep the smaller number
+            _n_past = std::min(n, _n_past);
+        } else
+            _n_past = n;
+    }
+
+    // Restore Samplers (optional)
+    for (auto& s : _sampler) {
+        if (!s.second->restore(name)) {
+            __WARN("dialog-restore: {} : unable to restore {} sampler", name, s.first);
+        }
+    }
+
+    _kpis.reset();
+    _kpis.restore.update(start.elapsed_usec());
+
+    return true;
+}
+
+void Dialog::reset() {
+    __INFO("dialog-reset: {}", _ctx->name());
+
+    _n_past      = 0;
+    _n_prompt    = 0;
+    _n_generated = 0;
+    _n_queries   = 0;
+    _last_tok    = -1;
+    _n_previous_prompt    = 0;
+    _n_previous_generated = 0;
+
+    _kpis.reset();
+
+    // Reset Engines and Samplers
+    for (auto& e : _engine)
+        e.second->reset();
+    for (auto& s : _sampler)
+        s.second->reset();
+
+    State::clear();
+}
+
+// Dialog KPIs helpers
+
+// Get latest KPIs
+Dialog::KPIs& Dialog::kpis() {
+    // Update TPS
+    if (_n_prompt) {
+        float t            = _kpis.prompt.last_usec / _n_prompt;
+        _kpis.tps.n_prompt = _n_prompt;
+        _kpis.tps.prompt   = 1000000.0 / (t ? t : 1000000.0);
+    }
+
+    if (_n_generated) {
+        float t              = _kpis.generate.last_usec / _n_generated;
+        _kpis.tps.n_generate = _n_generated;
+        _kpis.tps.generate   = 1000000.0 / (t ? t : 1000000.0);
+    }
+
+    // We could synthesize more KPIs from from other layers (engine, sampler, etc)
+    return _kpis;
+}
+
+std::string Dialog::KPIs::dump(std::string_view sep) const {
+    return fmt::format(
+            "init:[{}]{}prompt:[{}]{}generate:[{}]{}save:[{}]{}restore:[{}]{} tps-prompt:{:.2f} tps-generate:{:.2f}",
+            init.dump(),
+            sep,
+            prompt.dump(),
+            sep,
+            generate.dump(),
+            sep,
+            save.dump(),
+            sep,
+            restore.dump(),
+            sep,
+            tps.prompt,
+            tps.generate
+    );
+}
+
+void Dialog::KPIs::reset() {
+    init.reset();
+    prompt.reset();
+    generate.reset();
+    save.reset();
+    restore.reset();
+    tps.prompt   = 0.0;
+    tps.generate = 0.0;
+}
+
+// Create API
+
+// Dialog registry : type string + creator function
+using Registry = std::unordered_map<std::string, Dialog::Creator>;
+static std::unique_ptr<Registry> registry;
+
+void Dialog::__register(const std::string& type, Creator func) {
+    if (!registry) registry = std::make_unique<Registry>();
+
+    Registry& r = *registry;
+
+
+    r[type]     = func;
+}
+
+std::unique_ptr<Dialog> Dialog::create(
+        std::shared_ptr<Env> env,
+        const std::string&   name,
+        const qualla::json&  conf
+) {
+    
+    using qc         = qualla::Config;
+    std::string type = qc::optional<std::string>(conf, "type", "basic");
+
+    if (!registry) throw std::runtime_error(type + ": dialog not found");
+
+    Registry& r = *registry;
+
+    if (!r.contains(type)) throw std::runtime_error(type + ": dialog not found");
+
+    if (!r.contains(type)) {
+        throw std::runtime_error(type + ": dialog not found");
+    }
+
+    return std::unique_ptr<Dialog>(r[type](env, name, conf));
+}
+
+std::unique_ptr<Dialog> Dialog::create(
+        std::shared_ptr<Env> env,
+        const std::string&   name,
+        std::istream&        json_stream
+) {
+
+    return create(env, name, json::parse(json_stream));
+}
+
+std::unique_ptr<Dialog> Dialog::create(
+        std::shared_ptr<Env> env,
+        const std::string&   name,
+        const fs::path&      json_path
+) {
+
+    if (!fs::exists(json_path))
+        throw std::runtime_error(json_path.string() + ": file does not exist");
+    std::ifstream ifs(json_path);
+    return create(env, name, ifs);
+}
+
+std::vector<std::string> Dialog::list() {
+    std::vector<std::string> v;
+    if (!registry) return v;
+
+    Registry& r = *registry;
+
+    for (auto k : r)
+        v.push_back(k.first);
+    v.push_back("basic"); // default type, always registered
+    return v;
+}
+
+bool Dialog::applyLoraAdapter(std::string lora_adapter_name, std::string engine_role) {
+    auto& engine = *_engine[engine_role];
+    if (!engine.applyLoraAdapter(lora_adapter_name)) {
+        __WARN("dialog-applyLoraAdapter: failed for {}", lora_adapter_name);
+        return false;
+    }
+    return true;
+}
+bool Dialog::applyLoraStrength(std::string tensor_name, float tensor_val, std::string engine_role) {
+    auto& engine = *_engine[engine_role];
+    if (!engine.applyLoraStrength(tensor_name, tensor_val)) {
+        __WARN("dialog-applyLoraStrength: failed for {}", tensor_name);
+        return false;
+    }
+    return true;
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/dialogs/basic.cpp b/Genie/Genie/src/qualla/dialogs/basic.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..43e02cfd22d4b9e482a2c31801c710ab2365e8da
--- /dev/null
+++ b/Genie/Genie/src/qualla/dialogs/basic.cpp
@@ -0,0 +1,421 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All rights reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/dialog.hpp>
+#include <qualla/logger.hpp>
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/timer.hpp>
+#include <qualla/detail/onload.hpp>
+#include <qualla/detail/basic-dialog.hpp>
+
+#include <functional>
+#include <filesystem>
+#include <string>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+namespace fs = std::filesystem;
+
+#define __INFO(__fmt, ...)  _env->logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env->logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env->logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env->logger().post(Logger::KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace qualla {
+
+BasicDialog::BasicDialog(std::shared_ptr<Env> env, const std::string& name, const json& conf) : Dialog(env, name, conf) {
+    if (!_engine.contains("primary")) {
+        State::fatal("\"primary\" engine not present in config!");
+        return;
+    }
+}
+
+bool BasicDialog::processFollowOnGeneration(std::vector<int32_t>& tokens, std::vector<float>& logits, Dialog::Callback callback){
+
+    auto& sampler = *_sampler["primary"];
+    auto& engine  = *_engine["primary"];
+
+    while (true) {
+        if (State::canceled()) {
+          callback("", Sentence::END);
+          break;
+        }
+        // This condition is valid for both tokens and embedding
+        if (_n_past + 1 > _ctx->size()) {
+          __WARN("Context limit exceeded ({} + 1 > {})", _n_past, _ctx->size());
+          callback("", Sentence::END);
+          break;
+        }
+        if (m_inputType == InputType::TOKENS) {
+          if (!engine.process(tokens, logits))
+            return Dialog::abort("engine processing failed", callback);
+        } else if(m_inputType == InputType::EMBEDDINGS) {
+          // Convert tokens to embedding for the processing in the engine.
+          auto embedBufSize = engine.getEmbeddingBufferSize();
+          std::vector<uint8_t> embedding;
+          for(auto &token: tokens){
+              std::vector<uint8_t> curTokenEmbedding(embedBufSize,0);
+              m_t2eCallback(token, curTokenEmbedding.data(), embedBufSize);
+              embedding.insert(embedding.end(), curTokenEmbedding.begin(), curTokenEmbedding.end());
+          }
+          if (!engine.process(embedding, {}, logits))
+              return Dialog::abort("engine processing failed", callback);
+        }
+        else{
+          return Dialog::abort("No valid Input Type is used", callback);
+        }
+        tokens[0] = _last_tok = sampler.process(logits);
+
+        _n_past++;
+        _n_generated++;
+
+        if (!engine.updateKV(_n_past)) return Dialog::abort("context size exceeded", callback);
+
+        if (_ctx->is_eos(_last_tok)) {
+            callback("", Sentence::END);
+            break;
+        }
+
+        if (!callback(_tokenizer->decode(tokens), Sentence::CONTINUE)) break;
+    }
+
+    return true;
+}
+
+bool BasicDialog::process(std::vector<int32_t>& tokens, Dialog::Callback callback) {
+    // Check for prev failures and bail out early
+    if (State::failed()) return false;
+
+    Timer start;
+
+    if(m_inputType != InputType::TOKENS) {
+        __ERROR("Input type for model is not tokens.");
+        return false;
+    }
+
+    _gpio_marker->set();
+
+    // Vector for storing logits.
+    // Allocated & filled by the engine.
+    std::vector<float> logits;
+
+    State::clear();
+
+    auto& sampler = *_sampler["primary"];
+    auto& engine  = *_engine["primary"];
+
+    using FF = Engine::Feature::Flags;
+    if (engine.supports(FF::DYNAMIC_LOAD)) engine.load();
+
+    if (_n_past + tokens.size() > _ctx->size()) {
+        __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+        callback("", Sentence::END);
+        return true;
+    }
+
+    if (!engine.process(tokens, logits, false))
+        return Dialog::abort("engine prompt processing failed", callback);
+
+    _n_prompt += tokens.size();
+    _n_past += tokens.size();
+
+    if (!engine.updateKV(_n_past)) return Dialog::abort("context size exceeded", callback);
+
+    tokens[0] = _last_tok = sampler.process(logits);
+    tokens.resize(1);
+
+    _n_generated++;
+
+    _gpio_marker->set();
+
+    _kpis.prompt.update(start.elapsed_usec());
+
+    // Log latest KPIs
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    start.reset();
+
+    if (_ctx->is_eos(_last_tok)) {
+        callback("", Sentence::END);
+        return true;
+    }
+
+    if (!callback(_tokenizer->decode(tokens), Sentence::BEGIN)) return true;
+
+    State::busy(true);
+
+    processFollowOnGeneration(tokens, logits, callback);
+
+    State::busy(false);
+
+    _gpio_marker->set();
+    _gpio_marker->reset();
+
+    _kpis.generate.update(start.elapsed_usec());
+
+    // Log latest KPIs in a single line
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    return !State::failed();
+}
+
+bool BasicDialog::processFollowOnGeneration(std::vector<int32_t>& tokens, std::vector<float>& logits, qualla::DialogCallback callback){
+
+    auto& sampler = *_sampler["primary"];
+    auto& engine  = *_engine["primary"];
+
+    while (true) {
+        if (State::canceled()) {
+            callback.callBack(nullptr, 0, Sentence::END, tokenizer());
+            break;
+        }
+        // This condition is valid for both tokens and embedding
+        if (_n_past + 1 > _ctx->size()) {
+            __WARN("Context limit exceeded ({} + 1 > {})", _n_past, _ctx->size());
+            callback.callBack(nullptr, 0, Sentence::END, tokenizer());
+            break;
+        }
+        if (m_inputType == InputType::TOKENS) {
+            if (!engine.process(tokens, logits))
+                return Dialog::abort("engine processing failed", callback);
+        } else if(m_inputType == InputType::EMBEDDINGS) {
+            // Convert tokens to embedding for the processing in the engine.
+            auto embedBufSize = engine.getEmbeddingBufferSize();
+            std::vector<uint8_t> embedding;
+            for(auto &token: tokens){
+                std::vector<uint8_t> curTokenEmbedding(embedBufSize,0);
+                m_t2eCallback(token, curTokenEmbedding.data(), embedBufSize);
+                embedding.insert(embedding.end(), curTokenEmbedding.begin(), curTokenEmbedding.end());
+            }
+            if (!engine.process(embedding, {}, logits))
+                return Dialog::abort("engine processing failed", callback);
+        }
+        else{
+            return Dialog::abort("No valid Input Type is used", callback);
+        }
+        tokens[0] = _last_tok = sampler.process(logits);
+
+        _n_past++;
+        _n_generated++;
+
+        if (!engine.updateKV(_n_past)) return Dialog::abort("context size exceeded", callback);
+
+        if (_ctx->is_eos(_last_tok)) {
+            callback.callBack(nullptr, 0, Sentence::END, tokenizer());
+            break;
+        }
+
+        if (!callback.callBack(tokens.data(), tokens.size(), Sentence::CONTINUE, tokenizer())) break;
+    }
+
+    return true;
+}
+
+bool BasicDialog::process(std::vector<int32_t>& tokens, qualla::DialogCallback callback) {
+    // Check for prev failures and bail out early
+    if (State::failed()) return false;
+
+    Timer start;
+
+    if(m_inputType != InputType::TOKENS) {
+        __ERROR("Input type for model is not tokens.");
+        return false;
+    }
+
+    _gpio_marker->set();
+
+    // Vector for storing logits.
+    // Allocated & filled by the engine.
+    std::vector<float> logits;
+
+    State::clear();
+
+    auto& sampler = *_sampler["primary"];
+    auto& engine  = *_engine["primary"];
+
+    using FF = Engine::Feature::Flags;
+    if (engine.supports(FF::DYNAMIC_LOAD)) engine.load();
+
+    if (_n_past + tokens.size() > _ctx->size()) {
+        __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+        callback.callBack(nullptr, 0, Sentence::END, tokenizer());
+        return true;
+    }
+
+    if (!engine.process(tokens, logits, false)) {
+        return Dialog::abort("engine prompt processing failed", callback);
+    }
+
+    _n_prompt += tokens.size();
+    _n_past += tokens.size();
+
+    if (!engine.updateKV(_n_past)) {
+        return Dialog::abort("context size exceeded", callback);
+    }
+
+    tokens[0] = _last_tok = sampler.process(logits);
+    tokens.resize(1);
+
+    _n_generated++;
+
+    _gpio_marker->set();
+
+    _kpis.prompt.update(start.elapsed_usec());
+
+    // Log latest KPIs
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    start.reset();
+
+    if (_ctx->is_eos(_last_tok)) {
+        callback.callBack(nullptr, 0, Sentence::END, tokenizer());
+        return true;
+    }
+
+    if (!callback.callBack(tokens.data(), tokens.size(), Sentence::BEGIN, tokenizer()))
+       return true;
+
+    State::busy(true);
+    processFollowOnGeneration(tokens, logits, callback);
+    State::busy(false);
+
+    _gpio_marker->set();
+    _gpio_marker->reset();
+
+    _kpis.generate.update(start.elapsed_usec());
+
+    // Log latest KPIs in a single line
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    return !State::failed();
+}
+
+bool BasicDialog::process(
+        std::vector<uint8_t>& embedding_vectors,
+        T2ECallback         t2eCallback,
+        Dialog::Callback    callback
+) {
+    Timer start;
+
+    if(m_inputType != InputType::EMBEDDINGS) {
+        __ERROR("Input type for model is not embeddings.");
+        return false;
+    }
+
+    // Vector for storing logits.
+    // Allocated & filled by the engine.
+    std::vector<float> logits;
+
+    State::clear();
+
+    _gpio_marker->set();
+
+    auto& sampler = *_sampler["primary"];
+    auto& engine  = *_engine["primary"];
+
+    // Store the t2e callback for reference during follow-on generation.
+    m_t2eCallback = t2eCallback;
+
+    size_t embedBufSize = engine.getEmbeddingBufferSize();
+
+    {
+        std::vector<uint8_t> eosEmbedding(embedBufSize, 0.0);
+        if (m_t2eCallback) {
+            m_t2eCallback(_ctx->eos(), eosEmbedding.data(), embedBufSize);
+        }
+        // For non-autogenerative usecases (where t2eCallback is not supplied),
+        // the EOS vector is all zero. This is fine for models with proper
+        // attention masking support, but may degrade accuracy otherwise.
+        if (!engine.cacheEosEmbedding(eosEmbedding)) {
+            __DEBUG("Failed to set the eos token embedding.");
+            return false;
+        }
+    }
+
+    using FF = Engine::Feature::Flags;
+    if (engine.supports(FF::DYNAMIC_LOAD)) engine.load();
+
+    size_t curTokenCount = embedding_vectors.size() / embedBufSize;
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+    start.reset(); // Don't include preprocessing time
+
+    if (_n_past + curTokenCount > _ctx->size()) {
+        __WARN("Context limit exceeded ({} + {} > {})", _n_past, curTokenCount, _ctx->size());
+        callback("", Sentence::END);
+        return true;
+    }
+
+    if (!engine.process(embedding_vectors, {}, logits))
+        return Dialog::abort("engine prompt processing failed", callback);
+    _n_prompt += curTokenCount;
+    _n_past += curTokenCount;
+
+    std::vector<int32_t> tokens(1, 0);
+
+    if (!engine.updateKV(_n_past)) return Dialog::abort("context size exceeded", callback);
+
+    tokens[0] = _last_tok = sampler.process(logits);
+
+    _n_generated++;
+
+    _gpio_marker->set();
+
+    _kpis.prompt.update(start.elapsed_usec());
+
+    // Log latest KPIs
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    start.reset();
+
+    if (_ctx->is_eos(_last_tok)) {
+        callback("", Sentence::END);
+        return true;
+    }
+
+    if (!callback(_tokenizer->decode(tokens), Sentence::BEGIN)) {
+        return true;
+    }
+
+    if (!m_t2eCallback) {
+        callback("", Sentence::END);
+        return true;
+    }
+
+    State::busy(true);
+    processFollowOnGeneration(tokens, logits, callback);
+    State::busy(false);
+
+    _gpio_marker->set();
+    _gpio_marker->reset();
+
+    _kpis.generate.update(start.elapsed_usec());
+    // Log latest KPIs in a single line
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    return !State::failed();
+}
+
+// Registrator instance
+static OnLoad regy([]() {
+    Dialog::__register(
+            "basic",
+            [](std::shared_ptr<Env> env, const std::string& name, const json& conf) {
+                return (Dialog*)new BasicDialog(env, name, conf);
+            }
+    );
+});
+
+void needBasicDialog() {}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/dialogs/kv-share.cpp b/Genie/Genie/src/qualla/dialogs/kv-share.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0afef88e66638fe74864c54c171d464f3b39f637
--- /dev/null
+++ b/Genie/Genie/src/qualla/dialogs/kv-share.cpp
@@ -0,0 +1,359 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All rights reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/dialog.hpp>
+#include <qualla/sampler.hpp>
+#include <qualla/logger.hpp>
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/timer.hpp>
+#include <qualla/detail/onload.hpp>
+#include <qualla/detail/sampler-utils.hpp>
+#include <qualla/detail/basic-sampler.hpp>
+#include <qualla/detail/cache-file.hpp>
+
+#include <functional>
+#include <fstream>
+#include <string>
+#include <unordered_map>
+#include <filesystem>
+#include <random>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+namespace fs = std::filesystem;
+
+#define __INFO(__fmt, ...)  _env->logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env->logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env->logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env->logger().post(Logger::KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace qualla {
+
+    using qc = qualla::Config;
+
+    class KvShareDialog : public Dialog {
+    public:
+        KvShareDialog(std::shared_ptr<Env> env, const std::string& name, const json& conf)
+                : Dialog(env, name, conf) {}
+
+        virtual bool process(std::vector<int32_t>& tokens, Dialog::Callback callback) override;
+
+        virtual bool process(std::vector<int32_t>& tokens, DialogCallback callback) override {
+            return false;
+        }
+
+        virtual void reset() override;
+
+        bool convertKV(const fs::path& cache_dir);
+
+    };
+
+    void KvShareDialog::reset() {
+      __INFO("dialog-reset: {}", _ctx->name());
+
+      _n_past      = 0;
+      _n_prompt    = 0;
+      _n_generated = 0;
+      _n_queries   = 0;
+      _last_tok    = -1;
+
+      _kpis.reset();
+
+      // Reset Samplers
+      for (auto& s : _sampler)
+        s.second->reset();
+
+      // Reset Engines
+      for (auto& e : _engine) {
+        e.second->reset();
+        e.second->unload();
+      }
+
+      State::clear();
+    }
+
+    bool KvShareDialog::process(std::vector<int32_t>& tokens, Dialog::Callback callback) {
+
+      // Check for prev failures and bail out early
+      if (State::failed()) return false;
+
+      Timer start;
+
+      // Vector for storing logits.
+      // Allocated & filled by the engine.
+      std::vector<float> logits;
+
+      State::clear();
+
+      auto& sampler = *_sampler["primary"];
+
+      auto& p_engine = *_engine["primary"];   // prompt
+      auto& s_engine = *_engine["secondary"]; // generation
+
+      if (_n_past + tokens.size() > _ctx->size()) {
+        __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+        callback("", Sentence::END);
+        return true;
+      }
+
+      if (!p_engine.process(tokens, logits))
+        return Dialog::abort("engine prompt processing failed", callback);
+
+      _n_prompt += tokens.size();
+      _n_past += tokens.size();
+
+      if (!p_engine.updateKV(_n_past)) return Dialog::abort("context size exceeded", callback);
+
+      tokens[0] = _last_tok = sampler.process(logits);
+      tokens.resize(1);
+
+      _n_generated++;
+
+      _kpis.prompt.update(start.elapsed_usec());
+      // Log latest KPIs
+      _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+      if (_ctx->is_eos(_last_tok)) {
+        callback("", Sentence::END);
+        return true;
+      }
+
+      if (!callback(_tokenizer->decode(tokens), Sentence::BEGIN)) return true;
+
+      __DEBUG("dialog: {} : switching engines", _ctx->name());
+      {
+        // Setup cache dir for saving the engine state
+        std::string cache_name = _ctx->name() + "-kv-share";
+        fs::path    cache_dir  = _env->path().cache / cache_name;
+
+        if (!fs::exists(cache_dir) && !fs::create_directories(cache_dir)) {
+          __ERROR("dialog: {} : failed to create cache directory {}",
+                                    _ctx->name(),
+                                    cache_dir.string());
+          return Dialog::abort("engine switch failed", callback);
+        }
+
+        // Save and unload the primary engine
+        p_engine.save(cache_name);
+        p_engine.unload();
+
+        // The purpose is to save the hyperparams
+        s_engine.save(cache_name);
+
+        convertKV(cache_dir);
+
+        size_t n = s_engine.restore(cache_name);
+
+        if(!fs::remove_all(cache_dir)) {
+          __WARN("dialog: {} : cache files not closed/dir not found", _ctx->name());
+        }
+
+        if (n != _n_past) {
+          __WARN("dialog: {} : kv size mismatch {} expected {}", _ctx->name(), n, _n_past);
+          _n_past = n;
+        }
+
+        s_engine.updateKV(_n_past);
+      }
+
+      start.reset();
+
+      State::busy(true);
+
+      while (true) {
+        if (State::canceled()) {
+          callback("", Sentence::END);
+          break;
+        }
+
+        if (_n_past + tokens.size() > _ctx->size()) {
+          __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+          callback("", Sentence::END);
+          break;
+        }
+        if (!s_engine.process(tokens, logits))
+          return Dialog::abort("secondary engine processing failed", callback);
+
+        tokens[0] = _last_tok = sampler.process(logits);
+
+        _n_past++;
+        _n_generated++;
+
+        if (!s_engine.updateKV(_n_past)) return Dialog::abort("context size exceeded", callback);
+
+        if (_ctx->is_eos(_last_tok)) {
+          callback("", Sentence::END);
+          break;
+        }
+
+        if (!callback(_tokenizer->decode(tokens), Sentence::CONTINUE)) break;
+      }
+
+      State::busy(false);
+
+      _kpis.generate.update(start.elapsed_usec());
+
+      // Log latest KPIs in a single line
+      _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+      return true;
+    }
+
+    bool KvShareDialog::convertKV(const fs::path& cache_dir) {
+      Timer start;
+
+      fs::path nsp_cache_path = cache_dir / "kv-cache.primary.qnn-htp";
+      fs::path cpu_cache_path = cache_dir / "kv-cache.secondary.qnn-cpu";
+
+      __DEBUG("kv-convert: begin converting {} to ", nsp_cache_path.string(), cpu_cache_path.string());
+
+      std::ifstream nsp_fs(nsp_cache_path, std::ios::in | std::ios::binary);
+
+      if (nsp_fs.fail()) {
+        __ERROR("kv-convert: error reading file {}", nsp_cache_path.string());
+        State::error("failed to read primary kv-cache");
+        return false;
+      }
+
+      // Read spec from nsp file
+      CacheFileSpec nsp_spec;
+      nsp_fs.read((char*)&nsp_spec, sizeof(nsp_spec));
+      if (nsp_spec.magic != 0xC0DE) {
+        __ERROR("kv-convert: expected 0xC0DE found {:#x}", nsp_spec.magic);
+        State::error("invalid format of primary kv-cache");
+        return false;
+      }
+
+      // clang-format off
+      __DEBUG("kv-convert: load {{ num_tensors {}, magic {}, dtype {}, n_heads {}, embed_dim {} update_size {} }}",
+              nsp_spec.num_tensors, nsp_spec.magic, int(nsp_spec.dtype), nsp_spec.n_heads, nsp_spec.embed_dim, nsp_spec.update_size);
+      // clang-format on
+
+      std::fstream cpu_fs(cpu_cache_path, std::ios::in | std::ios::out | std::ios::binary);
+
+      if (cpu_fs.fail()) {
+        // TODO: replace with proper error handling
+        __ERROR("kv-convert: failed to write {}", cpu_cache_path.string());
+        State::error("failed to save secondary kv-cache");
+        return false;
+      }
+
+      CacheFileSpec cpu_spec;
+      cpu_fs.read((char*)&cpu_spec, sizeof(cpu_spec));
+      if (cpu_spec.magic != 0xC0DE) {
+        __ERROR("kv-convert: expected 0xC0DE found {:#x}", cpu_spec.magic);
+        State::error("invalid format of secondary kv-cache");
+        return false;
+      }
+
+      // Set the n_tokens processed during prompt processing and the spec write to file
+      cpu_spec.update_size = nsp_spec.update_size;
+      cpu_fs.seekp(std::ios::beg);
+      cpu_fs.write((char*)&cpu_spec, sizeof(cpu_spec));
+
+      const uint32_t n_layer = nsp_spec.num_tensors / 2;
+      const uint32_t n_head  = nsp_spec.n_heads;
+      const uint32_t kv_dim  = nsp_spec.embed_dim;
+      const uint32_t n_tok   = nsp_spec.update_size;
+
+      const size_t cache_size = n_layer * n_head * kv_dim * n_tok;
+
+      // Read Key/Value Cache
+      std::vector<uint8_t> key_cache(cache_size);
+      std::vector<uint8_t> value_cache(cache_size);
+      nsp_fs.read((char*)key_cache.data(), cache_size);
+      nsp_fs.read((char*)value_cache.data(), cache_size);
+
+      // Read Quantization parameters
+      std::vector<double> key_scales(n_layer);
+      std::vector<double> value_scales(n_layer);
+      nsp_fs.read((char*)key_scales.data(), n_layer * sizeof(double));
+      nsp_fs.read((char*)value_scales.data(), n_layer * sizeof(double));
+
+      nsp_fs.close();
+
+      // Convert and write on cpu_file
+      // Dequant and transpose caches
+      const uint32_t layer_size = n_head * kv_dim * n_tok;
+      const uint32_t head_size  = kv_dim * n_tok;
+
+      // Transpose kvdim * n_tok (QNN-HTP K$) -> n_tok * kvdim (QNN-CPU K$)
+      // For ScopGPT KV$ Format
+      __DEBUG("kv-convert: dequantizing keys");
+      std::vector<float> dequant_keys(cache_size);
+      for (uint32_t i = 0; i < n_layer; i++) {
+        for (uint32_t j = 0; j < n_head; j++) {
+          for (uint32_t k = 0; k < kv_dim; k++) {
+            for (uint32_t l = 0; l < n_tok; l++) {
+              // Interleave K$
+              // QNN HTP: [0 2 4 ... 126 1 3 5 ... 127]
+              // QNN CPU: [0 1 2 ... 63  64 65 ... 127]
+              const uint32_t interleaved_k =
+                      (2 * k < kv_dim) ? 2 * k : 2 * (k - kv_dim / 2) + 1;
+
+              const uint32_t read_loc  = i * layer_size + j * head_size + k * n_tok  + l;
+              const uint32_t write_loc = i * layer_size + j * head_size + l * kv_dim + interleaved_k;
+
+              dequant_keys[write_loc] =
+                      (static_cast<float>(key_cache[read_loc]) - 128) * key_scales[i];
+            }
+          }
+        }
+      }
+
+      __DEBUG("kv-convert: dequantizing values");
+      std::vector<float> dequant_values(cache_size);
+      for (uint32_t i = 0; i < n_layer; i++) {
+        for (uint32_t j = 0; j < n_head; j++) {
+          for (uint32_t l = 0; l < n_tok; l++) {
+            for (uint32_t k = 0; k < kv_dim; k++) {
+              const uint32_t read_loc  = i * layer_size + j * head_size + l * kv_dim + k;
+              const uint32_t write_loc = read_loc;
+
+              dequant_values[write_loc] =
+                      (static_cast<float>(value_cache[read_loc]) - 128) * value_scales[i];
+            }
+          }
+        }
+      }
+
+      __DEBUG("kv-convert: storing converted KV to file");
+      cpu_fs.write((char *)dequant_keys.data(), dequant_keys.size() * sizeof(float));
+      cpu_fs.write((char *)dequant_values.data(), dequant_values.size() * sizeof(float));
+
+      cpu_fs.flush();
+      cpu_fs.close();
+
+      __DEBUG("kv-convert: done converting {} to {} in {} usec",
+              nsp_cache_path.string(),
+              cpu_cache_path.string(),
+              start.elapsed_usec());
+
+      return true;
+
+    }
+
+// Registrator instance
+    static OnLoad regy([]() {
+        Dialog::__register(
+                "kv-share",
+                [](std::shared_ptr<Env> env, const std::string& name, const json& conf) {
+                    return (Dialog*)new KvShareDialog(env, name, conf);
+                }
+        );
+    });
+
+    void needKvShareDialog() {}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/dialogs/lhd-dec.cpp b/Genie/Genie/src/qualla/dialogs/lhd-dec.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..95c67c2702e92a26c9514987019153a8530cff82
--- /dev/null
+++ b/Genie/Genie/src/qualla/dialogs/lhd-dec.cpp
@@ -0,0 +1,481 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/dialog.hpp>
+#include <qualla/logger.hpp>
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/timer.hpp>
+#include <qualla/detail/onload.hpp>
+#include <qualla/detail/lhd-dialog.hpp>
+
+#include <functional>
+#include <filesystem>
+#include <string>
+#include <cmath>
+#include <cstdio>
+#include <random>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+namespace fs = std::filesystem;
+
+#define __INFO(__fmt, ...)  _env->logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env->logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env->logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env->logger().post(Logger::KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace qualla {
+
+using qc = qualla::Config;
+
+LhdDecDialog::LhdDecDialog(std::shared_ptr<Env> env, const std::string& name, const json& conf)
+    : Dialog(env, name, conf) {
+
+    _window = qc::optional<size_t>(conf, "window", 8);
+    _ngram  = qc::optional<size_t>(conf, "ngram", 3);
+    _gcap   = qc::optional<size_t>(conf, "gcap", 8);
+
+    _lhd_mode_str = qc::optional<std::string>(conf, "lhd-update-mode", "ALWAYS_FWD_ONE");
+}
+
+bool LhdDecDialog::process(std::vector<int32_t>& tokens, Dialog::Callback callback) {
+    // Check for prev failures and bail out early
+    if (State::failed()) return false;
+
+    Timer start;
+
+    // Vector for storing logits.
+    // Allocated & filled by the engine.
+    std::vector<float>   logits;
+    std::vector<int32_t> resultTokens;
+
+    State::clear();
+
+    auto& sampler = *_sampler["primary"];
+    auto& engine  = *_engine["primary"];
+
+    using FF = Engine::Feature::Flags;
+    if (engine.supports(FF::DYNAMIC_LOAD)) engine.load();
+
+    if (_n_past + tokens.size() > _ctx->size()) {
+        __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+        callback("", Sentence::END);
+        return true;
+    }
+
+    if (!engine.process(tokens, logits, false))
+        return Dialog::abort("engine prompt processing failed", callback);
+
+    _n_prompt += tokens.size();
+    _n_past += tokens.size();
+
+    if (!engine.updateKV(_n_past)) return Dialog::abort("context size exceeded", callback);
+
+    std::vector<int32_t> tokens_tmp(1);
+    tokens_tmp[0] = _last_tok = sampler.process(logits);
+    resultTokens.push_back(_last_tok);
+
+    _n_generated++;
+
+    _kpis.prompt.update(start.elapsed_usec());
+
+    // Log latest KPIs
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    if (_ctx->is_eos(_last_tok)) {
+        callback("", Sentence::END);
+        return true;
+    }
+
+    // Exit condition : Prediction limit reached OR ctx size limit reached
+    if (!callback(_tokenizer->decode(tokens_tmp), Sentence::BEGIN)) return true;
+
+    State::busy(true);
+
+    // verification branch init
+    v_branch.resize(_gcap);
+
+    // n-gram pools
+    const size_t    n_vocab = _ctx->n_vocab();
+    ngram_container ngrams_pool(n_vocab, _ngram, _gcap);
+
+    // lookahead branch first level init
+    lhd_branch.resize(_ngram - 1);
+    lhd_branch_prev.resize(_window);
+
+    for (int j = 0; j < _ngram - 1; j++) {
+        lhd_branch[j].resize(_window);
+
+        for (int i = 0; i < _window; i++) {
+            if (j == 0) {
+                // initialize with the random token from prompt
+                lhd_branch[j][i] = tokens[1 + rand() % (tokens.size() - 1)];
+            } else {
+                // initialize with a sequence of increasing numbers
+                lhd_branch[j][i] = 1000 + i;
+            }
+        }
+    }
+
+    // lookahead branch other level init
+    while (_level_idx < _ngram - 1) {
+
+        batch.clear();
+        attention_map.clear();
+
+        // fill the first token of the first level
+        batch.push_back(_last_tok);
+        attention_map.push_back(-1);
+        lhd_branch[0][0] = _last_tok;
+
+        // fill the remaining WINDOW - 1 tokens for the first level
+        for (int i = 1; i < _window; i++) {
+            batch.push_back(lhd_branch[0][i]);
+            attention_map.push_back(i - 1);
+        }
+
+        // fill the rest of the levels
+        for (int j = 1; j < _ngram - 1; j++) {
+            for (int i = 0; i < _window; i++) {
+                batch.push_back(lhd_branch[j][i]);
+                attention_map.push_back((j - 1) * _window + i);
+            }
+        }
+
+        // re-init tokens batch
+        tokens.resize(_window * (_ngram - 1));
+        tokens = batch;
+
+        if (_n_past + tokens.size() > _ctx->size()) {
+            __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+            callback("", Sentence::END);
+            break;
+        }
+
+        size_t n_tok = engine.process(tokens, attention_map, logits, true);
+        if (n_tok != tokens.size())
+            return Dialog::abort("engine lookahead branch processing failed", callback);
+
+        for (int i = 0; i < _window; i++) {
+            size_t sample_tmp_idx = (_level_idx - 1) * _window + i;
+            // sampler from logits all
+            std::span<float> span_logits{logits.data(),logits.size()};
+            std::span<float> span_tmp = span_logits.subspan(sample_tmp_idx * n_vocab, n_vocab);
+            int32_t          sampled_tmp_token = sampler.process(span_tmp);
+            lhd_branch[_level_idx][i]          = sampled_tmp_token;
+        }
+
+        _level_idx++;
+    }
+
+    if (_lhd_mode_str == "FWD_MAX_HIT")
+        _lhd_update_mode = FWD_MAX_HIT;
+    else if (_lhd_mode_str == "FWD_LEVEL")
+        _lhd_update_mode = FWD_LEVEL;
+    else
+        _lhd_update_mode = ALWAYS_FWD_ONE;
+
+    start.reset();
+
+    while (true) {
+        if (State::canceled()) {
+            callback("", Sentence::END);
+            break;
+        }
+        // input batch init
+        {
+            batch.clear();
+            attention_map.clear();
+
+            // fill the first token of the first level
+            batch.push_back(_last_tok);
+            attention_map.push_back(-1);
+            // lhd_branch[0][0] = _last_tok;
+
+            // fill the remaining WINDOW - 1 tokens for the first level
+            for (int i = 1; i < _window; i++) {
+                batch.push_back(lhd_branch[0][i]);
+                attention_map.push_back(i - 1);
+            }
+
+            // fill the rest of the levels
+            for (int j = 1; j < _ngram - 1; j++) {
+                for (int i = 0; i < _window; i++) {
+                    batch.push_back(lhd_branch[j][i]);
+                    attention_map.push_back((j - 1) * _window + i);
+                }
+            }
+
+            // build verification n-grams(branch)
+            {
+                const int g_cur = ngrams_pool.cnt[_last_tok];
+
+                v_branch.resize(g_cur);
+                // input_token_batch.size = (_window + g_cur) * (_ngram - 1);
+                tokens.resize((_window + g_cur) * (_ngram - 1));
+                for (int g = 0; g < g_cur; g++) {
+                    v_branch[g].active = true;
+                    v_branch[g].tokens.resize(_ngram);
+                    v_branch[g].i_batch.resize(_ngram);
+                    v_branch[g].seq_id     = _window + 1 + g;
+                    v_branch[g].i_batch[0] = 0;
+                    v_branch[g].tokens[0]  = _last_tok;
+                }
+
+                for (int j = 0; j < _ngram - 1; j++) {
+                    for (int g = 0; g < g_cur; g++) {
+                        const int     idx = _last_tok * (_ngram - 1) * _gcap + g * (_ngram - 1);
+                        const int32_t t   = ngrams_pool.tokens[idx + j];
+                        v_branch[g].tokens[j + 1]  = t;
+                        v_branch[g].i_batch[j + 1] = j + 1;
+                    }
+                }
+
+                for (int g = 0; g < g_cur; g++) {
+                    for (int j = 0; j < _ngram - 1; j++) {
+                        batch.push_back(v_branch[g].tokens[j + 1]);
+                        if (j == 0)
+                            attention_map.push_back(0);
+                        else
+                            attention_map.push_back(batch.size() - 2);
+                    }
+                }
+            }
+        }
+
+        // re-init tokens batch
+        std::vector<bool> selected(attention_map.size(), false);
+        tokens = batch;
+
+        if (_n_past + tokens.size() > _ctx->size()) {
+            __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+            callback("", Sentence::END);
+            break;
+        }
+
+        size_t n_tok = engine.process(tokens, attention_map, logits, true);
+        if (n_tok != tokens.size()) return Dialog::abort("engine gen processing failed", callback);
+
+        // verification branch seq-id
+        size_t seq_id_best = 0;
+        // max hit pos
+        size_t i_batch_best = 0;
+
+        // Lookahead decoding and verification
+        for (int v = 0; v < _ngram; ++v) {
+            int i_batch = 0;
+
+            if (v > 0) {
+                for (int g = 0; g < (int)v_branch.size(); g++) {
+                    // record the best matched seq and pos
+                    if (v_branch[g].active) {
+                        i_batch      = v_branch[g].i_batch[v];
+                        i_batch_best = i_batch;
+                        seq_id_best  = v_branch[g].seq_id;
+                        ++_n_accept;
+                        break;
+                    }
+                }
+
+                if (i_batch == 0) {
+                    break;
+                }
+            }
+
+            size_t sample_idx;
+            if (seq_id_best == 0)
+                sample_idx = 0;
+            else
+                sample_idx = _window * (_ngram - 1) + (seq_id_best - (_window + 1)) * (_ngram - 1) +
+                             i_batch - 1;
+
+            //vector selected set
+            selected[sample_idx] = true;
+
+            // sampler from logits all
+            std::span<float> span_logits{logits.data(),logits.size()};
+            std::span<float> sample_logit = span_logits.subspan(sample_idx * n_vocab, n_vocab);
+            _last_tok                     = sampler.process(sample_logit);
+
+            std::vector<int32_t> tokens_tmp(1);
+            tokens_tmp[0] = _last_tok;
+
+            resultTokens.push_back(_last_tok);
+            _n_generated++;
+            _n_past++;
+
+            if (_ctx->is_eos(_last_tok)) break;
+
+            if (!callback(_tokenizer->decode(tokens_tmp), Sentence::CONTINUE)) return true;
+
+            // if verify pass, check the next sample token until verifing failed
+            for (int g = 0; g < (int)v_branch.size(); g++) {
+                // update the n-gram active status
+                if (v_branch[g].active) {
+                    if (v == _ngram - 1) {
+                        v_branch[g].active = false;
+                    } else {
+                        if (_last_tok != v_branch[g].tokens[v + 1]) {
+                            v_branch[g].active = false;
+                        }
+                    }
+                }
+            }
+
+            // update lookahead tokens when v=0 OR verify match
+            {
+                for (int i = 0; i < _window; i++) {
+                    lhd_branch_prev[i] = lhd_branch[0][i];
+                }
+
+                if (v == 0) {
+                    for (int j = 0; j < _ngram - 2; j++) {
+                        lhd_branch[j] = lhd_branch[j + 1];
+                    }
+
+                    // sample from the last level
+                    for (int i = 0; i < _window; i++) {
+                        size_t           sample_idx = (_ngram - 2) * _window + i;
+                        std::span<float> sample_logit =
+                                span_logits.subspan(sample_idx * n_vocab, n_vocab);
+                        lhd_branch[_ngram - 2][i] = sampler.process(sample_logit);
+                    }
+                } else {
+                    if (_lhd_update_mode == FWD_MAX_HIT) {
+                        // update lookahead branch by foward
+                        for (int j = 0; j < _ngram - 1; j++) {
+                            for (int i = 0; i < _window - v; i++) {
+                                lhd_branch[j][i] = lhd_branch[j][i + 1];
+                            }
+                        }
+                    } else if (_lhd_update_mode == FWD_LEVEL) {
+                        // update lookahead branch by shifting level
+                        for (int j = 0; j < _ngram - 2; j++) {
+                            lhd_branch[j] = lhd_branch[j + 1];
+                        }
+
+                        for (int i = 0; i < _window; i++) {
+                            // init from the previous level
+                            lhd_branch[_ngram - 2][i] = lhd_branch[0][i];
+                        }
+                    }
+                }
+            }
+
+            // update n-grams pool
+            // only update n-grams pools when v=0
+            if (v == 0) {
+                std::vector<int32_t> ngram(_ngram - 1);
+                // n-gram pool generation
+                for (int f = 0; f < _window; ++f) {
+                    const int ft = lhd_branch_prev[f]; // first token of the n-gram
+
+                    for (int j = 0; j < _ngram - 1; ++j) {
+                        ngram[j] = lhd_branch[j][f];
+                    }
+
+                    // filter-out repeating n-grams
+                    {
+                        bool is_unique = true;
+
+                        for (int k = 0; k < ngrams_pool.cnt[ft]; ++k) {
+                            // caculate the related idx by the first n-gram token
+                            const int idx = ft * (_ngram - 1) * _gcap + k * (_ngram - 1);
+
+                            bool is_match = true;
+                            for (int j = 0; j < _ngram - 1; ++j) {
+                                if (ngrams_pool.tokens[idx + j] != ngram[j]) {
+                                    is_match = false;
+                                    break;
+                                }
+                            }
+
+                            // if n-gram match all, discard one of them
+                            if (is_match) {
+                                is_unique = false;
+                                break;
+                            }
+                        }
+
+                        if (!is_unique) {
+                            continue;
+                        }
+                    }
+
+                    const int head = ngrams_pool.head[ft];
+                    const int idx  = ft * (_ngram - 1) * _gcap + head * (_ngram - 1);
+
+                    for (int i = 0; i < _ngram - 1; i++) {
+                        // update the n-gram pool with new n-gram
+                        ngrams_pool.tokens[idx + i] = ngram[i];
+                    }
+
+                    ngrams_pool.cnt[ft]  = std::min(_gcap, ngrams_pool.cnt[ft] + 1);
+                    ngrams_pool.head[ft] = (head + 1) % _gcap;
+
+                    ngrams_pool.n_total++;
+                }
+            }
+        }
+
+        if (_lhd_update_mode == FWD_MAX_HIT) {
+            // std::random_device rd;
+            // std::mt19937 gen(rd());
+            // std::uniform_int_distribution<> dis(0, resultTokens.size() - 1);
+
+            // fill lookahead branch
+            for (int i = 0; i < _ngram - 1; i++) {
+                for (int j = _window - i_batch_best; j < _window; j++) {
+                    lhd_branch[i][j] = resultTokens[1 + rand() % (resultTokens.size() - 1)];
+                    // lhd_branch[i][j] = resultTokens[dis(gen)];
+                    // std::cout << "Fill token = " << lhd_branch[i][j] << std::endl;
+                }
+            }
+        }
+
+        // KV cache management
+        if (!engine.updateKV(_n_past, selected))
+            return Dialog::abort("context size exceeded", callback);
+
+        if (_ctx->is_eos(_last_tok)) {
+            callback("", Sentence::END);
+            break;
+        }
+    }
+
+    State::busy(false);
+
+    _kpis.generate.update(start.elapsed_usec());
+
+    // Log latest KPIs in a single line
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+    std::cout << std::endl << std::endl << std::flush;
+    __DEBUG("lhd-dec: n_generated = {} ---------- n_accept = {}", _n_generated, _n_accept);
+
+    return !State::failed();
+}
+
+// Registrator instance
+static OnLoad regy([]() {
+    Dialog::__register(
+            "lhd-dec",
+            [](std::shared_ptr<Env> env, const std::string& name, const json& conf) {
+                return (Dialog*)new LhdDecDialog(env, name, conf);
+            }
+    );
+});
+
+void needLadeDialog() {}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/dialogs/multistream.cpp b/Genie/Genie/src/qualla/dialogs/multistream.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b9cf614b1e6ae6b63382ad6f60e1d0e1b299c5ec
--- /dev/null
+++ b/Genie/Genie/src/qualla/dialogs/multistream.cpp
@@ -0,0 +1,300 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/dialog.hpp>
+#include <qualla/logger.hpp>
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/timer.hpp>
+#include <qualla/detail/onload.hpp>
+#include <qualla/detail/multistream-dialog.hpp>
+#include <qualla/detail/sampler-utils.hpp>
+
+#include <functional>
+#include <filesystem>
+#include <string>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+namespace fs = std::filesystem;
+
+#define __INFO(__fmt, ...)  _env->logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env->logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env->logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env->logger().post(Logger::KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace qualla {
+
+bool MultiStreamDialog::processFollowOnGeneration(std::vector<std::vector<int32_t>>& streams, std::vector<float>& logits, Dialog::Callback callback) {
+
+    auto& sampler = *_sampler["primary"];
+    auto& engine  = *_engine["primary"];
+
+    std::vector<std::vector<int32_t>> attention_mask(_n_streams);
+    std::vector<int32_t>              streamIndices;
+
+    if (streams.size() == 0) {
+        callback("\n", Sentence::END);
+        return true;
+    }
+
+    for (int i = 0; i < streams.size(); i++) {
+        // Initialize all attention_masks to attend to all previous tokens
+        attention_mask[i].resize(_n_past, 1);
+        streamIndices.push_back(i);
+    }
+
+    State::busy(true);
+
+    while (true) {
+        if (State::canceled()) break;
+
+        // If this exceeds context length, truncate all streams and return
+        if (_n_past + streamIndices.size() > _ctx->size()) {
+            for (auto stream : streamIndices)
+                callback(_tokenizer->decode(streams[stream]) + "\n", Sentence::CONTINUE);
+            break;
+        }
+
+        // Accumulate input tokens from all streams
+        std::vector<int32_t> multi_tokens(streamIndices.size());
+
+        for (int i = 0; i < streamIndices.size(); i++) {
+            multi_tokens[i] = streams[streamIndices[i]].back();
+
+            // Also add current iteration to the attention_mask
+            for (auto _mask_row : streamIndices)
+                // Set to true iff on diagonal, i.e. attend to itself
+                attention_mask[streamIndices[i]].push_back((streamIndices[i] == _mask_row) ? 1 : 0);
+        }
+
+        // Concatenate attention_mask for all active streams
+        std::vector<int32_t> multi_attn_mask;
+        multi_attn_mask.reserve((_n_past + streamIndices.size()) * streamIndices.size());
+        for (auto i : streamIndices)
+            multi_attn_mask.insert(
+                    multi_attn_mask.end(),
+                    attention_mask[i].begin(),
+                    attention_mask[i].end()
+            );
+
+        // __DEBUG("Multi attention mask = {}", multi_attn_mask);
+
+        if (m_inputType == InputType::TOKENS) {
+            // Process input tokens for all streams in one batch
+            if (!engine.process(multi_tokens, multi_attn_mask, logits, true))
+                return Dialog::abort("engine gen processing failed", callback);
+        } else if (m_inputType == InputType::EMBEDDINGS) {
+            // Accumulate input embeddings from all streams
+            auto embedBufSize = engine.getEmbeddingBufferSize();
+            std::vector<uint8_t> multi_embeddings;
+
+            for (auto token : multi_tokens) {
+                // Convert tokens to embedding for the processing in the engine.
+                std::vector<uint8_t> curTokenEmbedding(embedBufSize, 0);
+                m_t2eCallback(token, curTokenEmbedding.data(), embedBufSize);
+                multi_embeddings.insert(multi_embeddings.end(), curTokenEmbedding.begin(), curTokenEmbedding.end());
+            }
+
+            // Process input tokens for all streams in one batch
+            if (!engine.process(multi_embeddings, multi_attn_mask, logits, true))
+                return Dialog::abort("engine gen processing failed", callback);
+        }
+
+        // Process all logits independently
+        std::span<float> logit_span = std::span{logits.data(),logits.size()};
+        for (int i = 0; i < streamIndices.size(); i++) {
+            _last_tok = sampler.process(logit_span.subspan(i * _vocab, _vocab));
+            streams[streamIndices[i]].push_back(_last_tok);
+        }
+
+        _n_past += streamIndices.size();
+        _n_generated += streamIndices.size();
+
+        if (!engine.updateKV(_n_past)) return Dialog::abort("context size exceeded", callback);
+
+        for (auto it = streamIndices.begin(); it != streamIndices.end();) {
+            int32_t stream = *it;
+            if (_ctx->is_eos(streams[stream].back())) {
+                callback(_tokenizer->decode(streams[stream]) + "\n", Sentence::CONTINUE);
+                it = streamIndices.erase(it);
+            } else {
+                ++it;
+            }
+        }
+
+        if (streamIndices.size() == 0) break;
+    }
+    callback("\n", Sentence::END);
+
+    State::busy(false);
+
+    return true;
+}
+
+bool MultiStreamDialog::process(std::vector<int32_t>& tokens, Dialog::Callback callback) {
+    // Check for prev failures and bail out early
+    if (State::failed()) return false;
+
+    Timer start;
+
+    if(m_inputType != InputType::TOKENS) {
+        __ERROR("Input type for model is not tokens.");
+        return false;
+    }
+
+    // Vector for storing logits.
+    // Allocated & filled by the engine.
+    std::vector<float> logits;
+
+    State::clear();
+
+    auto& engine  = *_engine["primary"];
+
+    using FF = Engine::Feature::Flags;
+    if (engine.supports(FF::DYNAMIC_LOAD)) engine.load();
+
+    if (_n_past + tokens.size() > _ctx->size()) {
+        __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+        callback("", Sentence::END);
+        return true;
+    }
+
+    if (!engine.process(tokens, logits, false))
+        return Dialog::abort("engine prompt processing failed", callback);
+
+    _n_prompt += tokens.size();
+    _n_past += tokens.size();
+
+    _prompt_len = _n_past;
+
+    if (!engine.updateKV(_n_past)) return Dialog::abort("context size exceeded", callback);
+
+    std::vector<std::vector<int32_t>> streams;
+    getTopK(logits, streams, _n_streams, _p_threshold, callback);
+
+    _n_generated += streams.size();
+    _kpis.prompt.update(start.elapsed_usec());
+
+    // Log latest KPIs
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    start.reset();
+
+    bool status = processFollowOnGeneration(streams, logits, callback);
+
+    _kpis.generate.update(start.elapsed_usec());
+
+    // Log latest KPIs in a single line
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    return status;
+}
+
+bool MultiStreamDialog::process(
+        std::vector<uint8_t>& embedding_vectors,
+        T2ECallback         t2eCallback,
+        Dialog::Callback    callback
+) {
+    // Check for prev failures and bail out early
+    if (State::failed()) return false;
+
+    Timer start;
+
+    if(m_inputType != InputType::EMBEDDINGS) {
+        __ERROR("Input type for model is not embeddings.");
+        return false;
+    }
+
+    // Vector for storing logits.
+    // Allocated & filled by the engine.
+    std::vector<float> logits;
+
+    State::clear();
+
+    auto& sampler = *_sampler["primary"];
+    auto& engine  = *_engine["primary"];
+
+    // Store the t2e callback for reference during follow-on generation.
+    m_t2eCallback = t2eCallback;
+
+    size_t embedBufSize = engine.getEmbeddingBufferSize();
+
+    {
+        std::vector<uint8_t> eosEmbedding(embedBufSize, 0.0);
+        if (m_t2eCallback) {
+            m_t2eCallback(_ctx->eos(), eosEmbedding.data(), embedBufSize);
+        }
+        // For non-autogenerative usecases (where t2eCallback is not supplied),
+        // the EOS vector is all zero. This is fine for models with proper
+        // attention masking support, but may degrade accuracy otherwise.
+        if (!engine.cacheEosEmbedding(eosEmbedding)) {
+            __DEBUG("Failed to set the eos token embedding.");
+            return false;
+        }
+    }
+
+    using FF = Engine::Feature::Flags;
+    if (engine.supports(FF::DYNAMIC_LOAD)) engine.load();
+
+    size_t curTokenCount = embedding_vectors.size() / embedBufSize;
+    if (_n_past + curTokenCount > _ctx->size()) {
+        __WARN("Context limit exceeded ({} + {} > {})", _n_past, curTokenCount, _ctx->size());
+        callback("", Sentence::END);
+        return true;
+    }
+
+    if (!engine.process(embedding_vectors, {}, logits))
+        return Dialog::abort("engine prompt processing failed", callback);
+
+    _n_prompt += curTokenCount;
+    _n_past += curTokenCount;
+
+    _prompt_len = _n_past;
+
+    if (!engine.updateKV(_n_past)) return Dialog::abort("context size exceeded", callback);
+
+    std::vector<std::vector<int32_t>> streams;
+    getTopK(logits, streams, _n_streams, _p_threshold, callback);
+
+    _n_generated += streams.size();
+    _kpis.prompt.update(start.elapsed_usec());
+
+    // Log latest KPIs
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    start.reset();
+
+    bool status = processFollowOnGeneration(streams, logits, callback);
+
+    _kpis.generate.update(start.elapsed_usec());
+
+    // Log latest KPIs in a single line
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    return status;
+}
+
+// Registrator instance
+static OnLoad regy([]() {
+    Dialog::__register(
+            "multistream",
+            [](std::shared_ptr<Env> env, const std::string& name, const json& conf) {
+                return (Dialog*)new MultiStreamDialog(env, name, conf);
+            }
+    );
+});
+
+void needMultistreamDialog() {}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/dialogs/spec-dec.cpp b/Genie/Genie/src/qualla/dialogs/spec-dec.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b5e4e5023f4ff657e413c8af04858fd1e3701b02
--- /dev/null
+++ b/Genie/Genie/src/qualla/dialogs/spec-dec.cpp
@@ -0,0 +1,458 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All rights reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/dialog.hpp>
+#include <qualla/sampler.hpp>
+#include <qualla/logger.hpp>
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/timer.hpp>
+#include <qualla/detail/onload.hpp>
+#include <qualla/detail/sampler-utils.hpp>
+#include <qualla/detail/basic-sampler.hpp>
+
+#include <functional>
+#include <fstream>
+#include <string>
+#include <unordered_map>
+#include <filesystem>
+#include <random>
+#include <thread>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+namespace fs = std::filesystem;
+
+#define __INFO(__fmt, ...)  _env->logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env->logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env->logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env->logger().post(Logger::KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace qualla {
+
+using qc = qualla::Config;
+
+class SpecDecDialog : public Dialog {
+  public:
+    SpecDecDialog(std::shared_ptr<Env> env, const std::string& name, const json& conf);
+
+    virtual bool process(std::vector<int32_t>& tokens, Dialog::Callback callback) override;
+
+    virtual bool process(std::vector<int32_t>& tokens, DialogCallback callback) override {
+        return false;
+    }
+
+  private:
+    int32_t _draft_len; // Number of draft tokens
+    bool    _parallel;  // Enable parallel processing (where possible)
+
+    Sampler& _d_sampler; // Draft sampler
+    Sampler& _t_sampler; // Target sampler
+
+    // Token acceptor, called for each accepted token.
+    // Returns true to continue, false to stop
+    using Acceptor = std::function<bool(int32_t token)>;
+
+    // Rejection sampling.
+    // Returns number of accepted tokens
+    size_t rejectionSampling(
+            std::span<int32_t> tokens,
+            std::span<float>   target_logits,
+            std::span<float>   draft_probs,
+            Acceptor           accept
+    );
+
+    int32_t sampleFromModifiedDist(std::span<float> src0_dst, std::span<float> src1);
+};
+
+SpecDecDialog::SpecDecDialog(std::shared_ptr<Env> env, const std::string& name, const json& conf)
+    : Dialog(env, name, conf),
+      _d_sampler(_sampler.contains("draft") ? *_sampler["draft"] : *_sampler["target"]),
+      _t_sampler(*_sampler["target"]) {
+
+    _draft_len = qc::optional<int32_t>(conf, "draft-len", 3);
+    _parallel  = qc::optional<bool>(conf, "parallel", false);
+
+    // Check all underlying components for correct types an config
+    // If something is not right we set our error state that can be checked later
+
+    if (!_sampler.contains("target")) {
+        State::fatal("\"target\" sampler not present in config!");
+        return;
+    }
+
+    if (!_engine.contains("target")) {
+        State::fatal("\"target\" engine not present in config!");
+        return;
+    }
+    if (!_engine.contains("draft")) {
+        State::fatal("\"draft\" engine not present in config!");
+        return;
+    }
+}
+
+int32_t SpecDecDialog::sampleFromModifiedDist(std::span<float> src0_dst, std::span<float> src1) {
+    //  [max(prob_target[x] - prob_draft[x], 0.f) for all x in vocab]
+    size_t size = src0_dst.size();
+
+    if (_t_sampler.gumbel()) {
+        // Avoid going in the denormal zone.
+        float tiny = 1.1754943508222875e-38;
+
+#pragma clang loop vectorize(enable) unroll_count(4)
+        for (size_t i = 0U; i < size; i++) {
+            float p_src0 = std::exp(src0_dst[i]);
+            float p_src1 = std::exp(src1[i]);
+            src0_dst[i]  = std::log(std::max(tiny, p_src0 - p_src1));
+        }
+
+        // NOTE: The output logps_target is unnormalized since we use Gumbel trick.
+        //       If we use standard multinomial sampling, normalization should be added.
+
+    } else {
+        float sum = 0.0; // Unlikely to overflow (?)
+#pragma clang loop vectorize(enable) unroll_count(4)
+        for (size_t i = 0U; i < size; i++) {
+            float num = std::max(0.f, src0_dst[i] - src1[i]);
+            sum += num;
+            src0_dst[i] = num;
+        }
+        // Normalize
+#pragma clang loop vectorize(enable) unroll_count(4)
+        for (size_t i = 0U; i < size; i++) {
+            src0_dst[i] /= sum;
+        }
+    }
+
+    if (_t_sampler.greedy()) return argmax(src0_dst);
+
+    if (_t_sampler.gumbel()) return sampleUsingGumbelMax(src0_dst, _t_sampler.rng());
+
+    // Skipping softmax since the probs are already normalized
+    return sampleFromProbs(src0_dst, _t_sampler.rng());
+}
+
+size_t SpecDecDialog::rejectionSampling(
+        std::span<int32_t> tokens,
+        std::span<float>   target_logits,
+        std::span<float>   draft_probs,
+        Acceptor           accept
+) {
+    const size_t n_vocab = _ctx->n_vocab();
+    const size_t n_tok   = tokens.size();
+
+    assert(tokens.size() == draft_probs.size() / n_vocab);
+    assert(target_logits.size() == draft_probs.size() + n_vocab);
+
+    // Rejection sampling:
+    // For each token in the n_tok tokens sampled from the draft model:
+    // 1. Determine the probability of that token being accepted by the target model
+    // 2. Accept the token with probability = prob_target[tok] / prob_draft[tok] (clamped to [0, 1])
+    // 3. If the token is rejected, resample a new token from the following distribution:
+    //      [max(prob_target[x] - prob_draft[x], 0.f) for all x in vocab]
+    int32_t t_tok;
+    size_t  n_accepted = 0;
+
+    std::vector<float> target_probs;
+
+    for (int32_t i = 0; i < n_tok; i++) {
+        int32_t d_tok = tokens[i];
+
+        std::span<float> t_span = target_logits.subspan(i * n_vocab, n_vocab);
+
+        if (_t_sampler.greedy()) {
+            t_tok = _t_sampler.process(t_span);
+            if (t_tok != d_tok) {
+                // Reject
+                break;
+            }
+        } else {
+            target_probs.clear();
+            t_tok = _t_sampler.process(t_span, target_probs, false); // only probs, no token
+
+            // Acceptance threshold
+            double threshold;
+            float  prob_draft  = draft_probs[i * n_vocab + d_tok];
+            float  prob_target = target_probs[d_tok];
+
+            if (_t_sampler.gumbel()) {
+                threshold = std::exp(double(prob_target) - double(prob_draft));
+            } else {
+                threshold = double(prob_target) / double(prob_draft);
+            }
+
+            double r = sampleFromUniform(_t_sampler.rng());
+            if (r > threshold) {
+                // Reject
+                break;
+            }
+        }
+        // Accepted!
+        ++n_accepted;
+        if (!accept(d_tok)) return n_accepted;
+    }
+
+    // Sample an extra token either from the target distribution or the modified distribution
+    if (n_accepted == n_tok) {
+        t_tok = _t_sampler.process(target_logits.subspan(n_tok * n_vocab));
+    } else if (!_t_sampler.greedy()) {
+        // Resample from modified distribution.
+        t_tok = sampleFromModifiedDist(
+                std::span{target_probs.data(),target_probs.size()}, draft_probs.subspan(n_accepted * n_vocab, n_vocab)
+        );
+    } // for greedy, t_tok should be already valid from the loop above
+
+    ++n_accepted;
+    accept(t_tok);
+
+    return n_accepted;
+}
+
+bool SpecDecDialog::process(std::vector<int32_t>& tokens, Dialog::Callback callback) {
+
+    // Check for prev failures and bail out early
+    if (State::failed()) return false;
+
+    Timer start;
+
+    const size_t n_vocab = _ctx->n_vocab();
+
+    // Vector for storing logits.
+    // Allocated & filled by the engine.
+    std::vector<float> t_logits;
+    std::vector<float> d_logits;
+
+    bool keep_generating = true;
+
+    // A buffer for tokens to be decoded (one at a time, per the Middleware's request)
+    std::vector<int32_t> decode_buf(1, 0);
+
+    // Decode new token.
+    // Return true to continue generation, and false otherwise
+    auto decode_token = [&](int32_t t) {
+        decode_buf[0] = _last_tok = t;
+
+        if (_ctx->is_eos(t)) {
+            keep_generating = false;
+            callback("", Sentence::END);
+        } else {
+            keep_generating = callback(_tokenizer->decode(decode_buf), Sentence::CONTINUE);
+        }
+
+        return keep_generating;
+    };
+
+    State::clear();
+
+    auto& t_engine = *_engine["target"];
+    auto& d_engine = *_engine["draft"];
+
+    if (_n_past + tokens.size() > _ctx->size()) {
+        __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+        callback("", Sentence::END);
+        return true;
+    }
+
+    // Step 0: Process the prompt both on the target and draft models.
+    bool d_pmpt, t_pmpt;
+    if (_parallel) {
+        std::thread dt([&]() { d_pmpt = d_engine.process(tokens, d_logits, false); });
+        std::thread tt([&]() { t_pmpt = t_engine.process(tokens, t_logits, false); });
+        dt.join();
+        tt.join();
+    } else {
+        d_pmpt = d_engine.process(tokens, d_logits, false);
+        t_pmpt = t_engine.process(tokens, t_logits, false);
+    }
+
+    if (!d_pmpt) return Dialog::abort("draft engine prompt processing failed", callback);
+    if (!t_pmpt) return Dialog::abort("target engine prompt processing failed", callback);
+
+    // KV state Update
+    _n_prompt += tokens.size();
+    _n_past += tokens.size();
+
+    if (!t_engine.updateKV(_n_past)) return Dialog::abort("target context size exceeded", callback);
+    if (!d_engine.updateKV(_n_past)) return Dialog::abort("draft context size exceeded", callback);
+
+    // Sample one token from the target.
+    _last_tok = _t_sampler.process(t_logits);
+
+    _kpis.prompt.update(start.elapsed_usec());
+
+    // Log latest KPIs
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    if (!decode_token(_last_tok)) return true;
+
+    // Done with the prompt, start generating
+    start.reset();
+    State::busy(true);
+
+    // Buffers for all the tokens that need to be considered for each iteration
+    std::vector<int32_t> toks_to_target(_draft_len + 1);
+    std::vector<int32_t> toks_to_draft(2);
+
+    // Buffer for all the probability distributions from the draft sampler
+    std::vector<float> d_probs(n_vocab * _draft_len);
+
+    toks_to_target.assign(1, _last_tok);
+    toks_to_draft.assign(1, _last_tok);
+
+    // For keeping track of the number of tokens that were accepted in each iteration.
+    std::vector<int32_t> n_accepted_counts(_draft_len + 1, 0);
+
+    // Draft n_past, either in sync with n_past or one token behind (accepted-all)
+    size_t d_n_past = _n_past;
+
+    while (!State::canceled() && keep_generating) {
+        // Step 1: Use draft model to decode draft_len (aka gamma) tokens, and accumulate probabilities
+        d_probs.clear();
+
+        for (int32_t i = 0; i < _draft_len; i++) {
+            if (d_n_past + toks_to_draft.size() > _ctx->size()) {
+                __WARN("Context limit exceeded ({} + {} > {})",
+                       d_n_past,
+                       toks_to_target.size(),
+                       _ctx->size());
+                _kpis.generate.update(start.elapsed_usec());
+
+                // Log latest KPIs in a single line
+                _env->logger().post(Logger::KPIS, kpis().dump(" "));
+                callback("", Sentence::END);
+                return true;
+            }
+
+            if (!d_engine.process(toks_to_draft, d_logits))
+                return Dialog::abort("draft engine gen processing failed", callback);
+
+            d_n_past += toks_to_draft.size();
+
+            if (!d_engine.updateKV(d_n_past))
+                return Dialog::abort("draft context size exceeded", callback);
+
+            int32_t token = _d_sampler.process(d_logits, d_probs);
+            toks_to_draft.assign(1, token);
+            toks_to_target.push_back(token);
+
+            if (_ctx->is_eos(token)) break;
+        }
+
+        // Step 2: run the target model on the draft tokens
+        if (_n_past + toks_to_target.size() > _ctx->size()) {
+            __WARN("Context limit exceeded ({} + {} > {})",
+                   _n_past,
+                   toks_to_target.size(),
+                   _ctx->size());
+            callback("", Sentence::END);
+            _kpis.generate.update(start.elapsed_usec());
+
+            // Log latest KPIs in a single line
+            _env->logger().post(Logger::KPIS, kpis().dump(" "));
+            return true;
+        }
+
+        std::vector<int32_t> attention_map(toks_to_target.size());
+        std::iota(attention_map.begin(), attention_map.end(), -1);
+        size_t n_tok_t =
+                t_engine.process(toks_to_target, attention_map, t_logits, true /* all logits */);
+        if (n_tok_t != toks_to_target.size())
+            return Dialog::abort("target engine gen processing failed", callback);
+
+        // Step 3: accept or reject draft tokens
+        size_t n_accepted = rejectionSampling(
+                std::span{toks_to_target.data(),toks_to_target.size()}.subspan(1),
+                std::span{t_logits.data(),t_logits.size()}, std::span{d_probs.data(),d_probs.size()}, decode_token
+        );
+
+        _n_generated += n_accepted;
+        _n_past += n_accepted;
+
+        // Update stats
+        n_accepted_counts[n_accepted - 1]++;
+
+        // Accepted all?
+        if (n_accepted == _draft_len + 1) {
+            // Grab the last 2 tokens
+            toks_to_draft.assign({toks_to_target[_draft_len], _last_tok});
+            d_n_past = _n_past - 1;
+        } else {
+            // Grab only the last token
+            toks_to_draft.assign(1, _last_tok);
+            d_n_past = _n_past;
+        }
+
+        toks_to_target.assign(1, _last_tok);
+
+        __DEBUG("spec-dec: draft_len {} n_generated {} n_accepted {} n_past {}",
+                _draft_len,
+                _n_generated,
+                n_accepted,
+                _n_past);
+
+        std::vector<bool> selected(attention_map.size(), false);
+        selected[0]   = true; // first token is selected always
+        auto last_sel = 0;
+        for (int i = n_accepted - 1; i != 0; i = attention_map[i]) {
+            selected[i] = true;
+            last_sel    = i > last_sel ? i : last_sel;
+        }
+        selected.resize(last_sel + 1); // trim away rejected tokens
+
+        // Step 4: commit accepted tokens to kv-caches
+        if (!t_engine.updateKV(_n_past, selected))
+            return Dialog::abort("target context size exceeded", callback);
+        if (!d_engine.updateKV(d_n_past))
+            return Dialog::abort("draft context size exceeded", callback);
+    }
+
+    if (d_n_past != _n_past) {
+        // The draft engine needs to process one last token to catch up
+        toks_to_draft.resize(1);
+        if (!d_engine.process(toks_to_draft))
+            return Dialog::abort("draft engine gen processing failed", callback);
+        if (!d_engine.updateKV(_n_past))
+            return Dialog::abort("draft context size exceeded", callback);
+    }
+
+    State::busy(false);
+
+    _kpis.generate.update(start.elapsed_usec());
+
+    // Log latest KPIs in a single line
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+    __KPIS("spec-dec: accepted counts: {}", n_accepted_counts);
+
+    return true;
+}
+
+// Registrator instance
+static OnLoad regy([]() {
+    Dialog::__register(
+            "spec-dec",
+            [](std::shared_ptr<Env> env, const std::string& name, const json& conf) {
+                return (Dialog*)new SpecDecDialog(env, name, conf);
+            }
+    );
+});
+
+// Register spec-dec sampler for compatibility
+static OnLoad sampler_regy([]() {
+    Sampler::__register("spec-dec", [](Context& ctx, const json& conf) {
+        return (Sampler*)new BasicSampler(ctx, conf);
+    });
+});
+
+void needSpdDialog() {}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/dialogs/ssd-q1.cpp b/Genie/Genie/src/qualla/dialogs/ssd-q1.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1e0048f8125451ad27f546e9f0f4210f79e09468
--- /dev/null
+++ b/Genie/Genie/src/qualla/dialogs/ssd-q1.cpp
@@ -0,0 +1,1046 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All rights reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/context.hpp>
+#include <qualla/dialog.hpp>
+#include <qualla/sampler.hpp>
+#include <qualla/logger.hpp>
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/json.hpp>
+#include <qualla/detail/timer.hpp>
+#include <qualla/detail/onload.hpp>
+#include <qualla/detail/sampler-utils.hpp>
+#include <qualla/detail/basic-sampler.hpp>
+
+#include <functional>
+#include <fstream>
+#include <string>
+#include <unordered_map>
+#include <filesystem>
+#include <random>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+namespace fs = std::filesystem;
+
+#define __INFO(__fmt, ...)  _env->logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env->logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env->logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env->logger().post(Logger::KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env->logger().post(Logger::TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace qualla {
+
+using qc     = qualla::Config;
+using Logits = std::span<float>;
+
+class SelfSpecDecDialog : public Dialog {
+    enum { VERSION = 1 };
+
+  public:
+    SelfSpecDecDialog(std::shared_ptr<Env> env, const std::string& name, const json& conf);
+
+    virtual bool process(std::vector<int32_t>& tokens, Dialog::Callback callback) override;
+    virtual bool process(std::vector<uint8_t>& embedding_vectors, Dialog::T2ECallback t2eCallback, Dialog::Callback callback) override;
+    virtual void reset() override;
+
+    virtual bool process(std::vector<int32_t>& tokens, DialogCallback callback) override {
+        return false;
+    }
+
+    virtual bool save(const std::string& name) override;
+    virtual bool restore(const std::string& name) override;
+
+  private:
+    Sampler& _t_sampler;
+
+    int32_t _vocab;
+
+    std::string _kv_prefix_name{"forecast-prefix"};
+
+    // AR8
+    size_t              _draft{1};
+    std::vector<size_t> _branches{3};
+
+    size_t _forecast_prefix{16};
+    size_t _forecast_token_offset{32000};
+    size_t _forecast_token_count{4};
+
+    // Multistream parameters
+    int32_t _n_streams;
+    float   _p_threshold;
+
+    InputType m_inputType{InputType::UNKNOWN};
+
+    bool processFollowOnGeneration(std::vector<int32_t>& tokens, std::vector<float>& logits, Dialog::Callback callback);
+    // Multistream
+    bool processFollowOnGeneration(std::vector<std::vector<int32_t>>& streams, std::vector<float>& logits, Dialog::Callback callback);
+
+    /*
+        Helper function for combining masks for SSD mulstistream.
+
+        @param  masks           The attention mask to be tiled
+        @param  streamIndices   Indices of streams. The tiling count is equal to the size of this vector.
+        @param  pastMap         A vector of stream indices for masking all past tokens after the prompt.
+        @param  prefixOffset    Offset where KV prefix masking begins in each tile.
+        @param  finalMask       A mask that combines all of the independent masks such that
+                                they can be executed in the same inference.
+    */
+    void tileAttentionMask(const std::vector<int32_t>& mask, const std::vector<size_t> streamIndices, const std::vector<size_t>& pastMap, const size_t prefixOffset, std::vector<int32_t>& finalMask);
+
+    std::vector<int32_t> gen_attention_map() const;
+    auto                 get_len_flat_sample_tree() const;
+    auto                 gen_forecast_tokens(int repeat) const;
+
+    // Sampling and verification
+    std::vector<int32_t> build_sample_tree(
+            int32_t                     last_token,
+            Logits                      logits,
+            const std::vector<int32_t>& indices
+    );
+    std::tuple<std::vector<int32_t>, std::vector<int32_t>> verify_and_select_longest(
+            std::span<int32_t> sample_tree,
+            Logits             logits
+    );
+    std::vector<int32_t> sample_to_draft(Logits logits, size_t index, size_t count) {
+        const auto    thislogit = logits.subspan(index * _vocab, _vocab);
+        IndexedLogits logit(thislogit, _t_sampler.rng());
+        logit.topK(count);
+        return logit.indices;
+    }
+    int32_t sample_to_verify(Logits logits, size_t index) {
+        const auto thislogit = logits.subspan(index * _vocab, _vocab);
+        if (_t_sampler.greedy()) {
+            return argmax(thislogit);
+        }
+        auto token = _t_sampler.process(thislogit);
+        return token;
+    }
+};
+
+SelfSpecDecDialog::SelfSpecDecDialog(
+        std::shared_ptr<Env> env,
+        const std::string&   name,
+        const json&          conf
+)
+    : Dialog(env, name, conf), _t_sampler(*_sampler["primary"]) {
+
+    auto ssd_version = qc::optional<int>(conf, "ssd-version", 0);
+    if (ssd_version > SelfSpecDecDialog::VERSION) __WARN("newer ssd-version in config!");
+
+    _vocab = _ctx->n_vocab();
+
+    _branches = qc::optional(conf, "branches", _branches);
+    _draft    = _branches.size();
+
+    _forecast_prefix       = qc::optional(conf, "forecast-prefix", _forecast_prefix);
+    _forecast_token_count  = qc::optional(conf, "forecast-token-count", _forecast_token_count);
+    _forecast_token_offset = _vocab;
+
+    _kv_prefix_name = qc::optional(conf, "forecast-prefix-name", _kv_prefix_name);
+    
+    _n_streams   = qc::optional<int32_t>(conf, "n-streams", 1);
+    _p_threshold = qc::optional<float>(conf, "p-threshold", 0.0);
+
+    if (!_engine.contains("primary")) {
+        State::fatal("\"primary\" engine not present in config!");
+        return;
+    }
+
+    //Get Input Type from the engine
+    m_inputType = _engine["primary"]->getInputType();
+    // Load KV prefix
+    Timer  timer;
+    size_t n_restored_prefix = _engine["primary"]->restore(_kv_prefix_name);
+    if (n_restored_prefix != _forecast_prefix) {
+        // clang-format off
+        throw std::runtime_error( fmt::format( "SSD : Loaded {} KV$ from {} but expected {} KV$",
+                    n_restored_prefix, _kv_prefix_name, _forecast_prefix ) );
+        // clang-format on
+    }
+    _n_past = _forecast_prefix;
+    _kpis.restore.update(timer.elapsed_usec());
+}
+
+auto SelfSpecDecDialog::get_len_flat_sample_tree() const {
+    size_t len_flat_sample_tree = 1;
+    size_t last_tokens          = 1;
+    for (int i = 0; i < _draft; ++i) {
+        len_flat_sample_tree += last_tokens * _branches[i];
+        last_tokens = last_tokens * _branches[i];
+    }
+    return len_flat_sample_tree;
+}
+
+auto SelfSpecDecDialog::gen_forecast_tokens(int repeat) const {
+    std::vector<int32_t> forecast_tokens(_draft, 0);
+    std::iota(forecast_tokens.begin(), forecast_tokens.end(), _forecast_token_offset);
+
+    std::vector<int32_t> ret;
+    for (auto i = 0; i < repeat; ++i)
+        ret.insert(ret.end(), forecast_tokens.begin(), forecast_tokens.end());
+    return ret;
+}
+
+std::vector<int32_t> SelfSpecDecDialog::gen_attention_map() const {
+    auto                 len_flat_sample_tree = get_len_flat_sample_tree();
+    std::vector<int32_t> attention_map(len_flat_sample_tree + len_flat_sample_tree * _draft, -1);
+
+    auto build_verify_tree = [&attention_map,
+                              this](auto self, int parent_begin, int parent_end, int level) {
+        if (level == _draft) return;
+        auto current = parent_end;
+        for (auto parent = parent_begin; parent < parent_end; parent += 1) {
+            for (auto child = current; child < current + _branches[level]; child += 1)
+                attention_map[child] = parent;
+            current += _branches[level];
+        }
+        self(self, parent_end, current, level + 1);
+    };
+
+    auto build_forecast_tree = [&attention_map, this](int parent_begin, int parent_end) {
+        auto current = parent_end;
+        for (auto parent = parent_begin; parent < parent_end; parent += 1) {
+            for (auto child = current, current_parent = parent; child < current + _draft;
+                 child += 1) {
+                attention_map[child] = current_parent;
+                current_parent       = child;
+            }
+            current += _draft;
+        }
+    };
+
+    build_verify_tree(build_verify_tree, 0, 1, 0);
+    build_forecast_tree(0, len_flat_sample_tree);
+    return attention_map;
+}
+
+std::vector<int32_t> SelfSpecDecDialog::build_sample_tree(
+        int32_t                     last_token,
+        Logits                      logits,
+        const std::vector<int32_t>& indices
+) {
+    std::vector<int32_t> tree = {last_token};
+    for (auto draft = 0, repeat = 1; draft < _draft; ++draft) {
+        auto samples = sample_to_draft(logits, indices[draft], _branches[draft]);
+        for (auto i = 0; i < repeat; ++i) {
+            tree.insert(tree.end(), samples.begin(), samples.end());
+        }
+        repeat *= _branches[draft];
+    }
+    return tree;
+}
+
+std::tuple<std::vector<int32_t>, std::vector<int32_t>> SelfSpecDecDialog::verify_and_select_longest(
+        std::span<int32_t> sample_tree,
+        Logits             logits
+) {
+    std::vector<std::vector<int32_t>> accepted_all = {{sample_to_verify(logits, 0)}};
+    std::vector<std::vector<int32_t>> node_ids_all = {{0}};
+
+    std::vector<int32_t> draft_offset(_draft, 0);
+    draft_offset[0] = 1;
+    for (int32_t i = 1, draft_count = _branches[0]; i < _draft; ++i) {
+        draft_offset[i] = draft_offset[i - 1] + draft_count;
+        draft_count     = draft_count * _branches[i];
+    }
+
+    size_t longest = 0, longest_size = 1;
+    auto   verify_recursive = [&](auto                 self,
+                                std::vector<int32_t> accepted,
+                                std::vector<int32_t> node_ids,
+                                int                  draft,
+                                int                  offset_in_draft) -> void {
+        auto target      = accepted.back();
+        auto branch_base = draft_offset[draft] + offset_in_draft;
+        for (auto branch = 0; branch < _branches[draft]; ++branch) {
+            auto ndx_node = branch_base + branch;
+            if (!_ctx->is_eos(target) && target == sample_tree[ndx_node]) {
+                auto sample_accepted = sample_to_verify(logits, ndx_node);
+                accepted_all.push_back(accepted);
+                accepted_all.back().push_back(sample_accepted);
+                node_ids_all.push_back(node_ids);
+                node_ids_all.back().push_back(ndx_node);
+                if (node_ids_all.back().size() > longest_size) {
+                    longest      = node_ids_all.size() - 1;
+                    longest_size = node_ids_all.back().size();
+                }
+                if (draft + 1 < _draft)
+                    self(self,
+                         accepted_all.back(),
+                         node_ids_all.back(),
+                         draft + 1,
+                         (offset_in_draft + branch) * _branches[draft + 1]);
+            }
+        }
+    };
+    verify_recursive(verify_recursive, accepted_all.back(), node_ids_all.back(), 0, 0);
+    return {accepted_all[longest], node_ids_all[longest]};
+}
+
+void SelfSpecDecDialog::tileAttentionMask(const std::vector<int32_t>& mask, const std::vector<size_t> streamIndices, const std::vector<size_t>& pastMap, const size_t prefixOffset, std::vector<int32_t>& tiledMask) {
+
+    const size_t sampleTreeLen = get_len_flat_sample_tree();
+    const size_t pastMapLen    = pastMap.size();
+    const int posVal = 1, negVal = 0;
+
+    const size_t maskSize = mask.size();
+    const size_t numTokens = maskSize * streamIndices.size();
+
+    const size_t rowLength = _n_past + numTokens;
+    tiledMask.resize(numTokens * rowLength);
+
+    for (int maskIdx = 0; maskIdx < streamIndices.size(); maskIdx++) {
+        // Number of rows to skip to reach the current tile.
+        const size_t tileOffset = maskIdx * maskSize;
+        int32_t* const tileStart = &tiledMask[tileOffset*rowLength + tileOffset + _n_past];
+        for (int i = 0; i < maskSize; i++) {
+            // Pointer to the start of row i of the current mask
+            int32_t* rowPtr = &tiledMask[(tileOffset + i)*rowLength];
+            // Skip kv-prefix attention for rows without speculative tokens.
+            const int prefixFillVal = (i < prefixOffset) ? negVal : posVal;
+            std::fill_n(rowPtr, _forecast_prefix, prefixFillVal);
+            rowPtr += _forecast_prefix;
+            // Always attend to prompt.
+            std::fill_n(rowPtr, _n_prompt, posVal);
+            rowPtr += _n_prompt;
+
+            // Fill in the past valid tokens for this stream.
+            for (const size_t& pastIdx : pastMap) {
+                *rowPtr = (pastIdx == streamIndices[maskIdx]) ? posVal : negVal;
+                rowPtr++;
+            }
+
+            // Clear the rest of the row. It will mostly consist of 0's.
+            std::fill_n(rowPtr, rowLength - _n_prompt - _forecast_prefix - pastMapLen, negVal);
+            // Move to the correct tile.
+            rowPtr += tileOffset;
+            // Translate the mask.
+            const auto tokenId = mask[i];
+            if (tokenId > -1) {
+                std::copy_n(tileStart + (tokenId * rowLength), tokenId + 1, rowPtr);
+            }
+            // Always attend to self.
+            rowPtr[i] = posVal;
+        }
+    }
+}
+
+// Takes a vector of tokens and produces a vector of embeddings via the provided T2E callback.
+static inline void convertTokensToEmbeddings(std::vector<int32_t>& tokens,
+                                             std::vector<uint8_t>& embeddings,
+                                             size_t embeddingBufferSize,
+                                             Dialog::T2ECallback t2eCallback) {
+    for(auto &token : tokens){
+        std::vector<uint8_t> embedding(embeddingBufferSize,0);
+        t2eCallback(token, embedding.data(), embeddingBufferSize);
+        embeddings.insert(embeddings.end(), embedding.begin(), embedding.end());
+    }
+}
+
+bool SelfSpecDecDialog::processFollowOnGeneration(std::vector<int32_t>& tokens, std::vector<float>& logits, Dialog::Callback callback){
+
+    // Handles the printing of the subsequent generated tokens
+    bool          keep_generating = true;
+    const size_t  context         = _ctx->n_ctx();
+
+    std::vector<int32_t> decode_buf(
+            1, 0
+    ); // A buffer for tokens to be decoded (one at a time, per the Middleware's request)
+    auto decode_token = [&](int32_t t) {
+        if (!keep_generating) return;
+        // Decode new token.
+        // Return true to continue generation, and false otherwise
+        decode_buf[0] = _last_tok = t;
+        ++_n_generated;
+        if (_ctx->is_eos(t)) {
+            keep_generating = false;
+            callback("", Sentence::END);
+        } else {
+            keep_generating = callback(_tokenizer->decode(decode_buf), Sentence::CONTINUE);
+        }
+        return;
+    };
+    // set decode_buf from prompt processing
+    decode_buf[0] = _last_tok;
+
+    auto& engine = *_engine["primary"];
+
+    auto update_kv = [&engine, &callback, this](size_t past, const std::vector<bool>& selected) {
+        if (!engine.updateKV(past, selected))
+            return Dialog::abort("context size exceeded", callback);
+        return true;
+    };
+
+
+    // prepare the next inference
+    std::vector<int32_t> indices(_draft, 0);
+    std::iota(indices.begin(), indices.end(), 1);
+    tokens = build_sample_tree(sample_to_verify(std::span{logits.data(),logits.size()}, 0), std::span{logits.data(),logits.size()}, indices);
+    decode_token(tokens[0]);
+
+    // Prepare constant options for next inferences
+    const auto len_flat_sample_tree = get_len_flat_sample_tree();
+    const auto forecast_tokens      = gen_forecast_tokens(len_flat_sample_tree);
+    const auto attention_map        = gen_attention_map();
+
+    engine.set({{"kv-prefix-offset", len_flat_sample_tree}});
+
+    std::vector<int32_t> accepted_counts(_draft + 1, 0);
+    std::vector<bool>    selected(attention_map.size(), false);
+
+    while (!State::canceled() && keep_generating) {
+
+        // Append forecast tokens
+        tokens.insert(tokens.end(), forecast_tokens.begin(), forecast_tokens.end());
+
+        if (_n_past + tokens.size() > _ctx->size()) {
+            __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+            callback("", Sentence::END);
+            break;
+        }
+
+        size_t n_tok_t = 0;
+
+        // Bifurcate based on embedding as input or token as input
+        if (m_inputType == InputType::TOKENS)
+            n_tok_t = engine.process(tokens, attention_map, logits, true /* all logits */);
+        else if (m_inputType == InputType::EMBEDDINGS) {
+            // Convert tokens to embedding for the processing in the engine.
+            auto embedBufSize = engine.getEmbeddingBufferSize();
+            std::vector<uint8_t> embedding;
+            for(auto &token: tokens){
+                std::vector<uint8_t> curTokenEmbedding(embedBufSize,0);
+                m_t2eCallback(token, curTokenEmbedding.data(), embedBufSize);
+                embedding.insert(embedding.end(), curTokenEmbedding.begin(), curTokenEmbedding.end());
+            }
+            n_tok_t = engine.process(embedding, attention_map, logits, true /* all logits */);
+        } else {
+            return Dialog::abort("No valid Input Type is used", callback);
+        }
+        if (n_tok_t != tokens.size()) return Dialog::abort("engine processing failed", callback);
+
+        // Accept tokens
+        auto [accepted_tokens, accepted_ids] = verify_and_select_longest(std::span{tokens.data(),tokens.size()},
+                                                                         std::span{logits.data(),logits.size()});
+
+        // Commit accepted tokens to kv-caches
+        selected.resize(accepted_ids.back() + 1); // trim away rejected tokens
+        std::fill(selected.begin(), selected.end(), false);
+        for (auto id : accepted_ids)
+            selected[id] = true;
+        accepted_counts[accepted_tokens.size() - 1] += 1;
+        _n_past += accepted_tokens.size();
+        update_kv(_n_past, selected);
+
+        // Decode tokens
+        std::for_each(accepted_tokens.begin(), accepted_tokens.end(), decode_token);
+
+        // Prepare new tokens
+        auto next_draft_offset = len_flat_sample_tree + accepted_ids.back() * _draft;
+        std::iota(indices.begin(), indices.end(), next_draft_offset);
+        tokens = build_sample_tree(accepted_tokens.back(), std::span{logits.data(),logits.size()}, indices);
+    }
+
+    State::busy(false);
+
+    auto total_iteration = std::accumulate(accepted_counts.begin(), accepted_counts.end(), 0);
+    auto accept_rate =
+            float(_n_generated - 1) / total_iteration; // -1: exclude first generated token
+    __KPIS("SSD{{draft:{}, branch:{}, greedy:{}}}: accepted counts: {}, accept rate = {} tokens/iteration",
+           _draft,
+           _branches,
+           _t_sampler.greedy(),
+           accepted_counts,
+           accept_rate);
+
+    return true;
+}
+
+// Multistream AR generation
+bool SelfSpecDecDialog::processFollowOnGeneration(std::vector<std::vector<int32_t>>& streams, std::vector<float>& logits, Dialog::Callback callback) {
+
+    auto& sampler = *_sampler["primary"];
+    auto& engine  = *_engine["primary"];
+
+    auto update_kv = [&engine, &callback, this](size_t past, const std::vector<bool>& selected) {
+        if (!engine.updateKV(past, selected))
+            return Dialog::abort("context size exceeded", callback);
+        return true;
+    };
+
+    std::vector<size_t> streamIndices(streams.size());
+    std::vector<size_t> past_map(streams.size());
+
+    std::iota(streamIndices.begin(), streamIndices.end(), 0);
+    // Since the first inference is done separately, it is
+    // expected that each stream already has 1 valid AR token.
+    std::iota(past_map.begin(), past_map.end(), 0);
+
+    bool keep_generating = true;
+    const size_t context = _ctx->n_ctx();
+
+    if (streams.size() == 0) {
+        callback("\n", Sentence::END);
+        return true;
+    }
+
+    // Prepare constant options for next inferences
+    const auto len_flat_sample_tree = get_len_flat_sample_tree();
+    const auto forecast_tokens      = gen_forecast_tokens(len_flat_sample_tree);
+    const auto attention_map        = gen_attention_map();
+
+    std::vector<std::vector<int32_t>> draftStreams(streams.size());
+
+    for (int i = 0; i < streams.size(); i++) {
+        // prepare the next inference
+        std::vector<int32_t> indices(_draft, 0);
+        std::iota(indices.begin(), indices.end(), 1);
+        draftStreams[i] = build_sample_tree(sample_to_verify(std::span{logits.data(),logits.size()}, i*(1+_draft)), std::span{logits.data(),logits.size()}, indices);
+        streams[i].push_back(draftStreams[i][0]);
+
+    }
+
+    std::vector<int32_t> multi_attn_mask;
+
+    std::vector<int32_t> accepted_counts(_draft + 1, 0);
+
+    engine.set({{"kv-prefix-offset", len_flat_sample_tree}});
+
+    State::busy(true);
+
+    while (true) {
+        if (State::canceled()) break;
+
+        // If this exceeds context length, truncate all streams and return
+        if (_n_past + streamIndices.size() > _ctx->size()) {
+            for (auto stream : streamIndices)
+                callback(_tokenizer->decode(streams[stream]) + "\n", Sentence::CONTINUE);
+            break;
+        }
+
+        // Accumulate input tokens from all streams
+        std::vector<int32_t> multi_tokens;
+        for (auto streamIdx : streamIndices) {
+            multi_tokens.insert(multi_tokens.end(), draftStreams[streamIdx].begin(), draftStreams[streamIdx].end());
+            multi_tokens.insert(multi_tokens.end(), forecast_tokens.begin(), forecast_tokens.end());
+        }
+
+        if (_n_past + multi_tokens.size() > _ctx->size()) {
+            __WARN("Context limit exceeded ({} + {} > {})", _n_past, multi_tokens.size(), _ctx->size());
+            callback("", Sentence::END);
+            break;
+        }
+
+        tileAttentionMask(attention_map, streamIndices, past_map, len_flat_sample_tree, multi_attn_mask);
+
+        size_t n_tok_t = 0;
+
+        if (m_inputType == InputType::TOKENS) {
+            // Process input tokens for all streams in one batch
+            n_tok_t = engine.process(multi_tokens, multi_attn_mask, logits, true);
+        } else if (m_inputType == InputType::EMBEDDINGS) {
+            // Accumulate input embeddings from all streams
+            auto embedBufSize = engine.getEmbeddingBufferSize();
+            std::vector<uint8_t> multi_embeddings;
+
+            convertTokensToEmbeddings(multi_tokens, multi_embeddings, embedBufSize, m_t2eCallback);
+
+            // Process input tokens for all streams in one batch
+            n_tok_t = engine.process(multi_embeddings, multi_attn_mask, logits, true);
+        }
+        if (n_tok_t != multi_tokens.size()) return Dialog::abort("engine processing failed", callback);
+
+        std::vector<bool> all_selected;
+
+        // Process all logits independently
+        std::span<float> logit_span   = std::span{logits.data(),logits.size()};
+        std::span<int32_t> token_span = std::span{multi_tokens.data(), multi_tokens.size()};
+        for (int i = 0; i < streamIndices.size(); i++) {
+            const size_t streamIdx = streamIndices[i];
+            std::vector<int32_t>& stream = streams[streamIdx];
+
+            const size_t tileStride = draftStreams[streamIdx].size() + forecast_tokens.size();
+
+            std::span<float> tiled_logits = logit_span.subspan(i * tileStride * _vocab, _vocab);
+
+            // Accept tokens
+            auto [accepted_tokens, accepted_ids] = verify_and_select_longest(token_span.subspan(i * tileStride, tileStride),
+                                                                            tiled_logits);
+
+            // Commit accepted tokens to kv-caches
+            std::vector<bool> selected(tileStride, false);
+            for (auto id : accepted_ids) {
+                selected[id] = true;
+                past_map.push_back(streamIdx);
+            }
+            all_selected.insert(all_selected.end(), selected.begin(), selected.end());
+            accepted_counts[accepted_tokens.size() - 1] += 1;
+            _n_past += accepted_tokens.size();
+
+            // Decode tokens
+            stream.insert(stream.end(), accepted_tokens.begin(), accepted_tokens.end());
+            _n_generated += accepted_tokens.size();
+
+            // Prepare new tokens
+            std::vector<int32_t> indices(_draft, 0);
+            auto next_draft_offset = len_flat_sample_tree + accepted_ids.back() * _draft;
+            std::iota(indices.begin(), indices.end(), next_draft_offset);
+            draftStreams[streamIdx] = build_sample_tree(accepted_tokens.back(), tiled_logits, indices);
+        }
+
+        update_kv(_n_past, all_selected);
+        for (auto it = streamIndices.begin(); it != streamIndices.end();) {
+            int32_t stream = *it;
+            if (_ctx->is_eos(streams[stream].back())) {
+                callback(_tokenizer->decode(streams[stream]) + "\n", Sentence::CONTINUE);
+                it = streamIndices.erase(it);
+            } else {
+                ++it;
+            }
+        }
+
+        if (streamIndices.size() == 0) break;
+    }
+    callback("\n", Sentence::END);
+
+    State::busy(false);
+
+    auto total_iteration = std::accumulate(accepted_counts.begin(), accepted_counts.end(), 0);
+    auto accept_rate =
+            float(_n_generated - 1) / total_iteration; // -1: exclude first generated token
+    __KPIS("SSD{{draft:{}, branch:{}, greedy:{}}}: accepted counts: {}, accept rate = {} tokens/iteration",
+           _draft,
+           _branches,
+           _t_sampler.greedy(),
+           accepted_counts,
+           accept_rate);
+
+    return true;
+}
+
+// Handle prompt processing and generation will be done processFollowOnGeneration
+// Pass t2e callback using setter and remove as an argument. call setter from the base query function of dialog
+
+bool SelfSpecDecDialog::process(std::vector<uint8_t>& embedding,
+                                T2ECallback             t2eCallback,
+                                Dialog::Callback        callback ){
+
+    // Check for prev failures and bail out early
+    if (State::failed()) return false;
+
+    if(m_inputType != InputType::EMBEDDINGS) {
+        __ERROR("Input type for model is not embeddings.");
+        return false;
+    }
+
+    Timer start;
+    State::clear();
+
+    std::vector<float> logits;
+    auto&              engine = *_engine["primary"];
+
+    auto update_kv = [&engine, &callback, this](size_t past, const std::vector<bool>& selected) {
+        if (!engine.updateKV(past, selected))
+            return Dialog::abort("context size exceeded", callback);
+        return true;
+    };
+
+    // Store the t2e callback for reference during follow-on generation.
+    m_t2eCallback = t2eCallback;
+
+    auto embedBufSize = engine.getEmbeddingBufferSize();
+
+    {
+        std::vector<uint8_t> eosEmbedding(embedBufSize, 0.0);
+        if (m_t2eCallback) {
+            m_t2eCallback(_ctx->eos(), eosEmbedding.data(), embedBufSize);
+        }
+        if (!engine.cacheEosEmbedding(eosEmbedding)) {
+            __DEBUG("Failed to set the eos token embedding.");
+            return false;
+        }
+    }
+
+    using FF = Engine::Feature::Flags;
+    if (engine.supports(FF::DYNAMIC_LOAD)) engine.load();
+
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+    start.reset();
+
+    engine.set({{"kv-prefix-skip", _forecast_prefix}});
+
+    std::vector<int32_t> tokens(1,0);
+
+    // Process prompt
+    // get number of tokens in the input
+    size_t curTokensCount = embedding.size()/embedBufSize;
+
+    if(curTokensCount * embedBufSize != embedding.size()){
+        size_t expectedLength = (curTokensCount + (embedding.size()%embedBufSize != 0))*embedBufSize;
+        __DEBUG("Input is wrong expected {} and found {}.", expectedLength, embedding.size());
+        return Dialog::abort("Input is not an multiple for the embedding Length", callback);
+    }
+
+    _n_prompt += curTokensCount;
+
+    std::vector<int32_t> attention_map(curTokensCount);
+    std::iota(attention_map.begin(), attention_map.end(), -1);
+
+    engine.set({{"kv-prefix-offset", curTokensCount}}); // Do not attend prefix
+
+    if (_n_past + curTokensCount > _ctx->size()) {
+        __WARN("Context limit exceeded ({} + {} > {})", _n_past, curTokensCount, _ctx->size());
+        callback("", Sentence::END);
+        return true;
+    }
+
+    if (!engine.process(embedding, attention_map, logits, false))
+        return Dialog::abort("engine prompt processing failed", callback); // Change this message also to some generic message.
+    _n_past += curTokensCount;
+    update_kv(_n_past, {});
+
+    bool status = true;
+    if (_n_streams <= 1) {
+        tokens[0] = sample_to_verify(std::span{logits.data(),logits.size()}, 0);
+
+        // Decode the first token.
+        _last_tok = tokens[0];
+        if (_ctx->is_eos(_last_tok)) {
+            callback("", Sentence::END);
+            return true;
+        }
+
+        if (!callback(_tokenizer->decode(tokens), Sentence::BEGIN)) return true;
+        //decode_token(tokens[0]);
+
+        if (!m_t2eCallback) {
+            callback("", Sentence::END);
+            return true;
+        }
+
+        // Mark TTFT
+        _kpis.prompt.update(start.elapsed_usec());
+        start.reset();
+        State::busy(true);
+
+        // Initial inference for self-speculative decoding pipeline with forecast tokens and prefix
+        // process separately because logits are required for these tokens
+        for (int i = 0; i < _draft; ++i)
+            tokens.push_back(_forecast_token_offset + i);
+
+        attention_map.resize(tokens.size());
+        std::iota(attention_map.begin(), attention_map.end(), -1);
+        engine.set({{"kv-prefix-offset", 1}}); // Prevent the last token from attending
+
+        if (_n_past + tokens.size() > _ctx->size()) {
+            __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+            callback("", Sentence::END);
+            return true;
+        }
+
+        // Convert tokens to embeddings
+        // reset embedding vector to make space for the next runs
+        embedding.clear();
+        convertTokensToEmbeddings(tokens, embedding, embedBufSize, m_t2eCallback);
+
+        if (!engine.process(embedding, attention_map, logits, true))
+            return Dialog::abort("initial inference for SSD pipeline failed", callback);
+
+        _n_past += 1;
+        update_kv(_n_past, {});
+
+        // Use existing as much as possible
+        status = processFollowOnGeneration(tokens, logits, callback);
+    } else {
+        std::vector<std::vector<int32_t>> streams;
+        getTopK(logits, streams, _n_streams, _p_threshold, callback);
+
+        if (!m_t2eCallback) {
+            for (auto& stream : streams) {
+                if (!callback(_tokenizer->decode(stream) + "\n", Sentence::BEGIN)) return true;
+            }
+            callback("", Sentence::END);
+            return true;
+        }
+
+        // Mark TTFT
+        _kpis.prompt.update(start.elapsed_usec());
+        start.reset();
+        State::busy(true);
+
+        if (streams.size() == 0) {
+            callback("\n", Sentence::END);
+            return true;
+        }
+
+        // Initial inference for self-speculative decoding pipeline with forecast tokens and prefix
+        // process separately because logits are required for these tokens
+        attention_map.resize(1 + _draft);
+        std::iota(attention_map.begin(), attention_map.end(), -1);
+
+        std::vector<size_t> stream_indices(streams.size());
+        std::iota(stream_indices.begin(), stream_indices.end(), 0);
+
+        std::vector<int32_t> multi_attn_mask;
+        std::vector<size_t> past_map;
+        const size_t kvPrefixOffset = 1;
+
+        tileAttentionMask(attention_map, stream_indices, past_map, kvPrefixOffset, multi_attn_mask);
+
+        // Accumulate input tokens from all streams
+        std::vector<int32_t> multi_tokens;
+
+        multi_tokens.reserve(streams.size() * (1 + _draft));
+        for (int i = 0; i < streams.size(); i++) {
+            multi_tokens.insert(multi_tokens.end(), streams[i].begin(), streams[i].end());
+            for (int i = 0; i < _draft; ++i) {
+                multi_tokens.push_back(_forecast_token_offset + i);
+            }
+        }
+
+        // Convert tokens to embeddings
+        // reset embedding vector to make space for the next runs
+        embedding.clear();
+        convertTokensToEmbeddings(multi_tokens, embedding, embedBufSize, m_t2eCallback);
+
+        if (_n_past + multi_tokens.size() > _ctx->size()) {
+            __WARN("Context limit exceeded ({} + {} > {})", _n_past, multi_tokens.size(), _ctx->size());
+            callback("", Sentence::END);
+            return true;
+        }
+
+        if (!engine.process(embedding, multi_attn_mask, logits, true))
+            return Dialog::abort("initial inference for SSD pipeline failed", callback);
+
+        std::vector<bool> selected(multi_tokens.size(), false);
+        for (int i = 0; i < multi_tokens.size(); i+=(_draft+1)) {
+            selected[i] = true;
+        }
+
+        _n_past += streams.size();
+        update_kv(_n_past, selected);
+
+        status = processFollowOnGeneration(streams, logits, callback);
+    }
+
+    _kpis.generate.update(start.elapsed_usec());
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+    start.reset();
+
+    return status;
+}
+
+bool SelfSpecDecDialog::process(std::vector<int32_t>& tokens, Dialog::Callback callback) {
+
+    // Check for prev failures and bail out early
+    if (State::failed()) return false;
+
+    Timer start;
+
+    if(m_inputType != InputType::TOKENS) {
+        __ERROR("Input type for model is not tokens.");
+        return false;
+    }
+
+    State::clear();
+
+    std::vector<float> logits;
+    auto&              engine = *_engine["primary"];
+
+    auto update_kv = [&engine, &callback, this](size_t past, const std::vector<bool>& selected) {
+        if (!engine.updateKV(past, selected))
+            return Dialog::abort("context size exceeded", callback);
+        return true;
+    };
+
+    using FF = Engine::Feature::Flags;
+    if (engine.supports(FF::DYNAMIC_LOAD)) engine.load();
+
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+    start.reset();
+
+    engine.set({{"kv-prefix-skip", _forecast_prefix}});
+
+    std::vector<int32_t> attention_map(tokens.size());
+    std::iota(attention_map.begin(), attention_map.end(), -1);
+
+    // Process prompt
+    _n_prompt += tokens.size();
+    engine.set({{"kv-prefix-offset", tokens.size()}}); // Do not attend prefix
+
+    if (_n_past + tokens.size() > _ctx->size()) {
+        __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+        callback("", Sentence::END);
+        return true;
+    }
+
+    if (!engine.process(tokens, attention_map, logits, false))
+        return Dialog::abort("engine prompt processing failed", callback);
+    _n_past += tokens.size();
+    update_kv(_n_past, {});
+
+    bool status = true;
+    if (_n_streams <= 1) {
+        tokens[0] = sample_to_verify(std::span{logits.data(),logits.size()}, 0);
+        tokens.resize(1);
+
+        // Decode the first token.
+        _last_tok = tokens[0];
+        if (_ctx->is_eos(_last_tok)) {
+            callback("", Sentence::END);
+            return true;
+        }
+
+        if (!callback(_tokenizer->decode(tokens), Sentence::BEGIN)) return true;
+        // decode_token(tokens[0]);
+
+        // Mark TTFT
+        _kpis.prompt.update(start.elapsed_usec());
+        start.reset();
+        State::busy(true);
+
+        // Initial inference for self-speculative decoding pipeline with forecast tokens and prefix
+        // process separately because logits are required for these tokens
+        for (int i = 0; i < _draft; ++i)
+            tokens.push_back(_forecast_token_offset + i);
+
+        attention_map.resize(tokens.size());
+        std::iota(attention_map.begin(), attention_map.end(), -1);
+        engine.set({{"kv-prefix-offset", 1}}); // Prevent the last token from attending
+
+        if (_n_past + tokens.size() > _ctx->size()) {
+            __WARN("Context limit exceeded ({} + {} > {})", _n_past, tokens.size(), _ctx->size());
+            callback("", Sentence::END);
+            return true;
+        }
+
+        if (!engine.process(tokens, attention_map, logits, true))
+            return Dialog::abort("initial inference for SSD pipeline failed", callback);
+
+        _n_past += 1;
+        update_kv(_n_past, {});
+
+        status = processFollowOnGeneration(tokens, logits, callback);
+    } else {
+        std::vector<std::vector<int32_t>> streams;
+        getTopK(logits, streams, _n_streams, _p_threshold, callback);
+
+        // Mark TTFT
+        _kpis.prompt.update(start.elapsed_usec());
+        start.reset();
+        State::busy(true);
+
+        if (streams.size() == 0) {
+            callback("\n", Sentence::END);
+            return true;
+        }
+
+        // Initial inference for self-speculative decoding pipeline with forecast tokens and prefix
+        // process separately because logits are required for these tokens
+        attention_map.resize(1 + _draft);
+        std::iota(attention_map.begin(), attention_map.end(), -1);
+
+        std::vector<size_t> stream_indices(streams.size());
+        std::iota(stream_indices.begin(), stream_indices.end(), 0);
+
+        std::vector<int32_t> multi_attn_mask;
+        std::vector<size_t> past_map;
+        const size_t kvPrefixOffset = 1;
+
+        tileAttentionMask(attention_map, stream_indices, past_map, kvPrefixOffset, multi_attn_mask);
+
+        // Accumulate input tokens from all streams
+        std::vector<int32_t> multi_tokens;
+
+        multi_tokens.reserve(streams.size() * (1 + _draft));
+        for (int i = 0; i < streams.size(); i++) {
+            multi_tokens.insert(multi_tokens.end(), streams[i].begin(), streams[i].end());
+            for (int i = 0; i < _draft; ++i) {
+                multi_tokens.push_back(_forecast_token_offset + i);
+            }
+        }
+
+        if (_n_past + multi_tokens.size() > _ctx->size()) {
+            __WARN("Context limit exceeded ({} + {} > {})", _n_past, multi_tokens.size(), _ctx->size());
+            callback("", Sentence::END);
+            return true;
+        }
+
+        if (!engine.process(multi_tokens, multi_attn_mask, logits, true))
+            return Dialog::abort("initial inference for SSD pipeline failed", callback);
+
+        std::vector<bool> selected(multi_tokens.size(), false);
+        for (int i = 0; i < multi_tokens.size(); i+=(_draft+1)) {
+            selected[i] = true;
+        }
+
+        _n_past += streams.size();
+        update_kv(_n_past, selected);
+
+        status = processFollowOnGeneration(streams, logits, callback);
+    }
+
+    _kpis.generate.update(start.elapsed_usec());
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+    start.reset();
+
+    return status;
+}
+
+void SelfSpecDecDialog::reset() {
+  Dialog::reset();
+  _n_past = _forecast_prefix;
+  size_t n_restored_prefix = _engine["primary"]->restore(_kv_prefix_name);
+  if (n_restored_prefix != _forecast_prefix) {
+    // clang-format off
+    throw std::runtime_error( fmt::format( "SSD : Loaded {} KV$ from {} but expected {} KV$",
+                                           n_restored_prefix, _kv_prefix_name, _forecast_prefix ) );
+    // clang-format on
+  }
+}
+
+bool SelfSpecDecDialog::save(const std::string& name) {
+    if (_n_streams > 1) {
+        throw std::runtime_error("Save is unsupported for multistream dialogs.");
+    }
+    return Dialog::save(name);
+}
+
+bool SelfSpecDecDialog::restore(const std::string& name) {
+    if (_n_streams > 1) {
+        throw std::runtime_error("Restore is unsupported for multistream dialogs.");
+    }
+    return Dialog::restore(name);
+}
+
+// Registrator instance
+static OnLoad regy([]() {
+    Dialog::__register(
+            "ssd-q1",
+            [](std::shared_ptr<Env> env, const std::string& name, const json& conf) {
+                return (Dialog*)new SelfSpecDecDialog(env, name, conf);
+            }
+    );
+});
+
+// Register ssd sampler for compatibility
+static OnLoad sampler_regy([]() {
+    Sampler::__register("basic", [](Context& ctx, const json& conf) {
+        return (Sampler*)new BasicSampler(ctx, conf);
+    });
+});
+
+void needSsdDialog() {}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/embedding.cpp b/Genie/Genie/src/qualla/embedding.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9426e5715fa5050b54870222a55692c862e082ab
--- /dev/null
+++ b/Genie/Genie/src/qualla/embedding.cpp
@@ -0,0 +1,190 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/embedding.hpp>
+#include <qualla/logger.hpp>
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/timer.hpp>
+
+#include <functional>
+#include <fstream>
+#include <string>
+#include <unordered_map>
+#include <filesystem>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+namespace fs = std::filesystem;
+
+namespace qualla {
+
+Embedding::Embedding(std::shared_ptr<Env> env, const std::string& name, const qualla::json& json)
+    : _name(name), _env(env) {
+    Timer start;
+
+    _env->logger().debug(fmt::format("embedding-new: {} config {}", name, json.dump()));
+
+    using qc = qualla::Config;
+
+    // Parse prompt config
+    const qualla::json& pmt_conf = qc::optional<qualla::json>(json, "prompt", {});
+    _tags = qc::optional<std::vector<std::string>>(pmt_conf, "tags", {"", ""});
+
+    // Create the context first
+    _ctx = Context::create(*_env, name, qc::optional<qualla::json>(json, "context", {}));
+
+    // Create Tokenizer
+    fs::path tok_path = _env->path().models / qc::mandatory<std::string>(json, "tokenizer");
+    _tokenizer        = Tokenizer::create(*_ctx, tok_path);
+
+    // Create Engine
+    const qualla::json& eng_conf = qc::mandatory<qualla::json>(json, "engine");
+    _engine                      = Engine::create(*_ctx, eng_conf);
+
+    // Truncation of input to context
+    _input_truncation = qc::optional<qualla::json>(json, "truncate-input", false);
+
+    using FF = Engine::Feature::Flags;
+    if (!_engine->supports(FF::OUTPUT_EMBEDDINGS))
+        throw std::runtime_error("engine must output embeddings");
+
+    _kpis.init.update(start.elapsed_usec());
+}
+
+Embedding::~Embedding() {}
+
+bool Embedding::process(std::vector<int32_t>& tokens, std::vector<float>& output) {
+    Timer start;
+
+    State::clear();
+
+    size_t n = _engine->process(tokens, output, false);
+    if (!n) {
+        State::error("engine prompt processing failed");
+        return false;
+    }
+
+    _n_prompt += tokens.size();
+
+    // Clean the buffer before using
+    _output_dimensions.clear();
+
+    uint64_t output_size = 1;
+    // push number of tokens present in the result.
+    _output_dimensions.push_back(n);
+    // push back the dimension of the each embedding
+    _output_dimensions.push_back(_ctx->n_embd());
+
+    output_size = n * _ctx->n_embd();
+
+    output.resize(output_size);
+
+    _kpis.prompt.update(start.elapsed_usec());
+
+    // Log latest KPIs in a single line
+    _env->logger().post(Logger::KPIS, kpis().dump(" "));
+
+    return true;
+}
+
+bool Embedding::query(const std::string& str, std::vector<float>& output) {
+    std::string          p_str; // prompt string
+    std::vector<int32_t> p_vec; // prompt tokens
+
+    p_vec.reserve(_ctx->n_ctx());
+
+    p_str = _tags[0] + str + _tags[1];
+
+    _env->logger().debug(fmt::format("embedding-query: {}", str));
+    _env->logger().debug(fmt::format("embedding-prompt: {}", p_str));
+
+    _n_queries++;
+
+    _tokenizer->encode(p_str, p_vec);
+
+    _env->logger().debug(fmt::format("embedding-tokens: {}", p_vec));
+
+    if(p_vec.size() > (_ctx->n_ctx())){ // Condition to not allow input to exceed context.
+        if(_input_truncation == false){
+            throw std::runtime_error("Input exceeds the context of the model.");
+        }
+        else{
+            p_vec.resize(_ctx->n_ctx());
+        }
+    }
+
+    return process(p_vec, output);
+}
+
+// Embedding KPIs helpers
+
+
+void Embedding::output_dimensions(std::vector<std::uint32_t>& outputDimensions){
+    outputDimensions = _output_dimensions;
+}
+
+// Get latest KPIs
+Embedding::KPIs& Embedding::kpis() {
+    // Update TPS
+    if (_n_prompt) {
+        float t          = _kpis.prompt.total_usec / _n_prompt;
+        _kpis.tps.prompt = 1000000.0 / (t ? t : 1000000.0);
+    }
+
+    // We could synthesize more KPIs from from other layers (engine, sampler, etc)
+    return _kpis;
+}
+
+std::string Embedding::KPIs::dump(std::string_view sep) const {
+    return fmt::format(
+            "init:[{}]{}prompt:[{}]{} tps-prompt:{:.2f}",
+            init.dump(),
+            sep,
+            prompt.dump(),
+            sep,
+            tps.prompt
+    );
+}
+
+void Embedding::KPIs::reset() {
+    init.reset();
+    prompt.reset();
+    tps.prompt = 0.0;
+}
+
+// Create API
+
+std::unique_ptr<Embedding> Embedding::create(
+        std::shared_ptr<Env> env,
+        const std::string&   name,
+        const qualla::json&  conf
+) {
+    return std::make_unique<Embedding>(env, name, conf);
+}
+
+std::unique_ptr<Embedding> Embedding::create(
+        std::shared_ptr<Env> env,
+        const std::string&   name,
+        std::istream&        json_stream
+) {
+    return create(env, name, json::parse(json_stream));
+}
+
+std::unique_ptr<Embedding> Embedding::create(
+        std::shared_ptr<Env> env,
+        const std::string&   name,
+        const fs::path&      json_path
+) {
+    if (!fs::exists(json_path))
+        throw std::runtime_error(json_path.string() + ": file does not exist");
+    std::ifstream ifs(json_path);
+    return create(env, name, ifs);
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engine.cpp b/Genie/Genie/src/qualla/engine.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2e2d3ce3db40f1e230259d7128b3c05790cb8543
--- /dev/null
+++ b/Genie/Genie/src/qualla/engine.cpp
@@ -0,0 +1,198 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/engine.hpp>
+#include <qualla/detail/kpi.hpp>
+#include <qualla/detail/config.hpp>
+
+#include <functional>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+namespace qualla {
+
+Engine::Engine(Context& ctx, const std::string& type, const qualla::json& conf)
+    : _type(type), _ctx(ctx), _env(ctx.env()) {
+    _env.logger().debug(
+            fmt::format("engine-new: {} ctx {} config {}", type, _ctx.name(), conf.dump())
+    );
+
+    using qc = qualla::Config;
+    _role    = qc::optional<std::string>(conf, "role", "primary");
+}
+
+Engine::~Engine() {}
+
+size_t Engine::process(
+        const std::vector<int32_t>& tokens,
+        const std::vector<int32_t>& attention_map,
+        std::vector<float>&         output,
+        bool                        output_all
+) {
+    _env.logger().error(fmt::format("{}-engine does not support attention_map", _type));
+    return 0;
+}
+
+size_t Engine::process(const std::vector<int32_t>& tokens) {
+    // Derived engines should overwrite this to avoid copying logits
+    std::vector<float> logits;
+    return process(tokens, logits);
+}
+
+size_t Engine::process(
+        std::vector<uint8_t>&       embeddings,
+        const std::vector<int32_t>& attention_map,
+        std::vector<float>&         output,
+        bool                        output_all
+) {
+    _env.logger().error(fmt::format("{}-engine does not support embedding as input", _type));
+    return 0;
+}
+
+bool Engine::updateKV(size_t n_past) {
+    _env.logger().error(fmt::format("{}-engine does not support sync", _type));
+    return false;
+}
+
+bool Engine::updateKV(size_t n_past, const std::vector<bool>& selected) {
+    _env.logger().error(fmt::format("{}-engine does not support sync with selected", _type));
+    return false;
+}
+
+size_t Engine::restore(const std::string& name) {
+    _env.logger().error(fmt::format("{}-engine does not support restore", _type));
+    return 0;
+}
+
+bool Engine::save(const std::string& name) {
+    _env.logger().error(fmt::format("{}-engine does not support save", _type));
+    return false;
+}
+
+void Engine::reset() {
+    _env.logger().error(fmt::format("{}-engine does not support reset", _type));
+}
+
+bool Engine::load() {
+    _env.logger().error(fmt::format("{}-engine does not support dynamic load", _type));
+    return 0;
+}
+
+bool Engine::unload() {
+    _env.logger().error(fmt::format("{}-engine does not support dynamic unload", _type));
+    return false;
+}
+
+bool Engine::set(qualla::json data) {
+    _env.logger().error(fmt::format("{}-engine does not support set()", _type));
+    return false;
+}
+
+qualla::json Engine::get() {
+    _env.logger().error(fmt::format("{}-engine does not support get()", _type));
+    return false;
+}
+
+bool Engine::cacheEosEmbedding(std::vector<uint8_t>& eosEmbedding) {
+    _env.logger().error(fmt::format("{}-engine does not support cache eos embedding", _type));
+    return true;
+}
+
+size_t Engine::getEmbeddingBufferSize() {
+    _env.logger().error(fmt::format("{}-engine does not support embedding vectors", _type));
+    return 0;
+}
+
+qualla::InputType Engine::getInputType(){
+    return qualla::InputType::TOKENS;
+}
+
+// Engine KPIs
+
+std::string Engine::KPIs::dump(std::string_view sep) const {
+    return fmt::format(
+            "load:[{}]{}process:[{}]{}update-kv:[{}]{}unload:[{}]",
+            load.dump(),
+            sep,
+            process.dump(),
+            sep,
+            update_kv.dump(),
+            sep,
+            unload.dump()
+    );
+}
+
+void Engine::KPIs::reset() {
+    load.reset();
+    process.reset();
+    update_kv.reset();
+    unload.reset();
+}
+
+// Engine registry type string + creator function
+using Registry = std::unordered_map<std::string, Engine::Creator>;
+static std::unique_ptr<Registry> registry;
+
+void Engine::__register(const std::string& type, Creator func) {
+    if (!registry) registry = std::make_unique<Registry>();
+
+    Registry& r = *registry;
+    r[type]     = func;
+}
+
+std::unique_ptr<Engine> Engine::create(Context& ctx, const qualla::json& conf) {
+    using qc         = qualla::Config;
+    
+    std::string type = qc::mandatory<std::string>(conf, "type");
+
+
+    if (!registry) throw std::runtime_error(type + ": engine not found");
+
+    Registry& r = *registry;
+
+
+    if (!r.contains(type)) throw std::runtime_error(type + ": engine not found");
+
+
+    return std::unique_ptr<Engine>(r[type](ctx, conf));
+}
+
+std::unique_ptr<Engine> Engine::create(Context& ctx, std::istream& json_stream) {
+    return create(ctx, json::parse(json_stream));
+}
+
+std::unique_ptr<Engine> Engine::create(Context& ctx, const std::string& json_str) {
+    return create(ctx, json::parse(json_str));
+}
+
+std::vector<std::string> Engine::list() {
+    std::vector<std::string> v;
+    if (!registry) return v;
+
+    Registry& r = *registry;
+
+    for (auto k : r)
+        v.push_back(k.first);
+    return v;
+}
+
+bool Engine::applyLoraAdapter(std::string lora_adapter_name) {
+    _env.logger().error(fmt::format("{}-engine does not support LoraAdapter", _type));
+    return false;
+}
+bool Engine::applyLoraStrength(std::string tensor_name, float tensor_val) {
+    _env.logger().error(fmt::format("{}-engine does not support setLoraStrength", _type));
+    return false;
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/lib.cpp b/Genie/Genie/src/qualla/engines/lib.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c6faaf917164472fe19d83559379f64d3c952628
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/lib.cpp
@@ -0,0 +1,9 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+// Just a stub for building qualla::engines when no built-in engines are enabled
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/BackendExtensions.cpp b/Genie/Genie/src/qualla/engines/qnn-api/BackendExtensions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..023834fab96e0bd8b2f3898d4ef6aea7a4276a30
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/BackendExtensions.cpp
@@ -0,0 +1,158 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "dlwrap.hpp"
+#include "BackendExtensions.hpp"
+#include "NetRunBackend.hpp"
+
+BackendExtensions::BackendExtensions(
+        BackendExtensionsConfigs             backendExtensionsConfig,
+        void*                                backendLibHandle,
+        PerfProfile                          perfProfile,
+        std::shared_ptr<ICommandLineManager> clManager,
+        bool                                 debug_qnn
+)
+    : m_backendExtensionsLibPath(backendExtensionsConfig.sharedLibraryPath),
+      m_backendExtensionsConfigPath(backendExtensionsConfig.configFilePath),
+      m_backendInterface(nullptr), m_isNetRunBackendInterface(false),
+      m_createBackendInterfaceFn(nullptr), m_destroyBackendInterfaceFn(nullptr),
+      m_backendLibHandle(backendLibHandle), m_perfProfile(perfProfile), m_clManager(clManager),
+      m_debugQnn(debug_qnn) {
+    (void)m_perfProfile;
+}
+
+BackendExtensions::~BackendExtensions() {
+    if (nullptr != m_backendInterface) {
+        if (m_isNetRunBackendInterface) {
+            QNN_DEBUG("Deleting NetRun Backend Interface");
+            delete m_backendInterface;
+        } else {
+            if (nullptr != m_destroyBackendInterfaceFn) {
+                QNN_DEBUG("Destroying Backend Interface");
+                m_destroyBackendInterfaceFn(m_backendInterface);
+            }
+        }
+    }
+}
+
+bool BackendExtensions::loadFunctionPointers() {
+
+    void* libHandle = dlopen(m_backendExtensionsLibPath.c_str(), RTLD_NOW | RTLD_LOCAL);
+    if (nullptr == libHandle) {
+        QNN_ERROR(
+                "Unable to load backend extensions lib: [%s]. dlerror(): [%s]",
+                m_backendExtensionsLibPath.c_str(),
+                dlerror()
+        );
+        return false;
+    }
+    m_createBackendInterfaceFn =
+            (CreateBackendInterfaceFnType_t)dlsym(libHandle, "createBackendInterface");
+    m_destroyBackendInterfaceFn =
+            (DestroyBackendInterfaceFnType_t)dlsym(libHandle, "destroyBackendInterface");
+    if (nullptr == m_createBackendInterfaceFn || nullptr == m_destroyBackendInterfaceFn) {
+        QNN_ERROR("Unable to find symbols. dlerror(): [%s]", dlerror());
+        return false;
+    }
+
+    return true;
+}
+
+void BackendExtensions::qnnLogCallback(
+        const char*    fmt,
+        QnnLog_Level_t level,
+        uint64_t       timestamp,
+        va_list        args
+) {
+    char        buffer[1024] = "";
+    const char* levelStr     = "";
+    switch (level) {
+    case QNN_LOG_LEVEL_ERROR:
+        levelStr = " ERROR ";
+        break;
+    case QNN_LOG_LEVEL_WARN:
+        levelStr = "WARNING";
+        break;
+    case QNN_LOG_LEVEL_INFO:
+        levelStr = "  INFO ";
+        break;
+    case QNN_LOG_LEVEL_DEBUG:
+        levelStr = " DEBUG ";
+        break;
+    case QNN_LOG_LEVEL_VERBOSE:
+        levelStr = "VERBOSE";
+        break;
+    case QNN_LOG_LEVEL_MAX:
+        levelStr = "UNKNOWN";
+        break;
+    }
+
+    int pos = snprintf(
+            buffer, sizeof(buffer), "QNN: [%s] time=%lu:", levelStr, (unsigned long)timestamp
+    );
+    vsnprintf(buffer + pos, sizeof(buffer) - pos, fmt, args);
+    printf("%s", buffer);
+}
+
+bool BackendExtensions::initialize() {
+
+    QNN_DEBUG("DEBUG: m_backendExtensionsLibPath=%s\n", m_backendExtensionsLibPath.c_str());
+    QNN_DEBUG("DEBUG: m_backendExtensionsConfigPath=%s\n", m_backendExtensionsConfigPath.c_str());
+    if (m_backendExtensionsLibPath.empty() && m_backendExtensionsConfigPath.empty()) {
+        QNN_WARN("No BackendExtensions lib provided; initializing NetRunBackend Interface");
+        m_isNetRunBackendInterface = true;
+        m_backendInterface         = new NetRunBackend();
+    } else {
+        QNN_DEBUG("Loading supplied backend extensions lib.");
+        QNN_DEBUG("Backend extensions lib path: %s", m_backendExtensionsLibPath.c_str());
+        if (m_backendExtensionsConfigPath.empty()) {
+            QNN_DEBUG("Backend extensions lib specified without a config file.");
+        } else {
+            QNN_DEBUG("Backend extensions config path: %s", m_backendExtensionsConfigPath.c_str());
+        }
+        if (!loadFunctionPointers()) {
+            QNN_ERROR("Failed to load function pointers.");
+            return false;
+        }
+        if (nullptr != m_createBackendInterfaceFn) {
+            m_backendInterface = m_createBackendInterfaceFn();
+        }
+    }
+    if (nullptr == m_backendInterface) {
+        QNN_ERROR("Unable to load backend extensions interface.");
+        return false;
+    }
+    if (m_debugQnn) {
+        if (!(m_backendInterface->setupLogging(BackendExtensions::qnnLogCallback, QNN_LOG_LEVEL_VERBOSE))) {
+            QNN_WARN("Unable to initialize logging in backend extensions.");
+        }
+    }
+    if (!m_backendInterface->initialize(m_backendLibHandle)) {
+        QNN_ERROR("Unable to initialize backend extensions interface.");
+        return false;
+    }
+    if (!m_backendInterface->setPerfProfile(m_perfProfile)) {
+        QNN_WARN("Unable to set perf profile in  backend extensions interface.");
+        //return false;
+    }
+    if (!m_backendInterface->loadConfig(m_backendExtensionsConfigPath)) {
+        QNN_ERROR("Unable to load backend extensions interface config.");
+        return false;
+    }
+
+    if ((m_clManager != nullptr) && !m_backendInterface->loadCommandLineArgs(m_clManager)) {
+        QNN_ERROR("Unable to load backend extensions' command line arguments.");
+        return false;
+    }
+
+    return true;
+}
+
+IBackend* BackendExtensions::interface() {
+    return m_backendInterface;
+}
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/BackendExtensions.hpp b/Genie/Genie/src/qualla/engines/qnn-api/BackendExtensions.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a17b25dbd32315ae1c1d075b536c7009c5d24f42
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/BackendExtensions.hpp
@@ -0,0 +1,62 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <string>
+
+#include "IBackend.hpp"
+#include "QnnConfig.hpp"
+#include "Log.hpp"
+
+// This is a wrapper class that handles resources/state related to
+// backend extensions interface. This is used by QnnNetRun library
+// to manage and call into an IBackend interface implementation.
+// Functionality present in this class:
+//      1. Receives the argument string related to backend_extensions
+//         argument from the front end and processes it to open the
+//         backend extensions library.
+//      2. Locates and stores symbols for creating and destroying the
+//         IBackend interface implementation.
+//      3. If there is no backend_extensions argument, this class creates
+//         the dummy IBackend implementation aka NetRunBackend.
+//      4. Gives QnnNetRun access to the implementation itself through
+//         interface() function.
+class BackendExtensions final {
+  public:
+    BackendExtensions(
+            BackendExtensionsConfigs             backendExtensionsConfig,
+            void*                                backendLibHandle,
+            PerfProfile                          perfProfile,
+            std::shared_ptr<ICommandLineManager> clManager =
+                    std::shared_ptr<ICommandLineManager>(nullptr),
+            bool debug_qnn = false
+    );
+    ~BackendExtensions();
+    bool      initialize();
+    IBackend* interface();
+
+  private:
+    bool                                 loadFunctionPointers();
+    std::string                          m_backendExtensionsLibPath;
+    std::string                          m_backendExtensionsConfigPath;
+    IBackend*                            m_backendInterface;
+    bool                                 m_isNetRunBackendInterface;
+    CreateBackendInterfaceFnType_t       m_createBackendInterfaceFn;
+    DestroyBackendInterfaceFnType_t      m_destroyBackendInterfaceFn;
+    void*                                m_backendLibHandle;
+    PerfProfile                          m_perfProfile;
+    std::shared_ptr<ICommandLineManager> m_clManager;
+    bool                                 m_debugQnn{false};
+    static void                          qnnLogCallback(
+                                     const char*    fmt,
+                                     QnnLog_Level_t level,
+                                     uint64_t       timestamp,
+                                     va_list        args
+                             );
+};
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/ClientBuffer.cpp b/Genie/Genie/src/qualla/engines/qnn-api/ClientBuffer.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..deefb50fb474551bf95493e4d3caa2ddaafcd8ec
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/ClientBuffer.cpp
@@ -0,0 +1,122 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "ClientBuffer.hpp"
+#include "QnnTypeMacros.hpp"
+
+void* ClientBuffer::getBuffer(Qnn_Tensor_t* tensor) {
+    if (!tensor) {
+        QNN_WARN("getBuffer: received a null pointer to a tensor");
+        return nullptr;
+    }
+    return QNN_TENSOR_GET_CLIENT_BUF(tensor).data;
+}
+
+size_t ClientBuffer::getBufferSize(Qnn_Tensor_t* tensor) {
+    if (!tensor) {
+        QNN_WARN("getBufferSize: received a null pointer to a tensor");
+        return 0;
+    }
+    return QNN_TENSOR_GET_CLIENT_BUF(tensor).dataSize;
+};
+
+bool ClientBuffer::allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) {
+    if (!tensor) {
+        QNN_ERROR("Received nullptr for tensors");
+        return false;
+    }
+    QNN_TENSOR_SET_MEM_TYPE(tensor, QNN_TENSORMEMTYPE_RAW);
+    Qnn_ClientBuffer_t clientBuffer;
+    clientBuffer.data = malloc(tensorDataSize);
+    if (nullptr == clientBuffer.data) {
+        QNN_ERROR("mem alloc failed for clientBuffer.data");
+        return false;
+    }
+    clientBuffer.dataSize = tensorDataSize;
+    QNN_TENSOR_SET_CLIENT_BUF(tensor, clientBuffer);
+    return true;
+}
+
+bool ClientBuffer::freeTensorBuffer(Qnn_Tensor_t* tensor) {
+    if (!tensor) {
+        QNN_ERROR("Received nullptr for tensors");
+        return false;
+    }
+    if (QNN_TENSOR_GET_CLIENT_BUF(tensor).data) {
+        if (m_sameMemoryFreeTensors.find(tensor) == m_sameMemoryFreeTensors.end()) {
+            free(QNN_TENSOR_GET_CLIENT_BUF(tensor).data);
+        }
+        QNN_TENSOR_SET_CLIENT_BUF(tensor, Qnn_ClientBuffer_t({nullptr, 0u}));
+        QNN_TENSOR_SET_MEM_TYPE(tensor, QNN_TENSORMEMTYPE_UNDEFINED);
+    }
+    return true;
+}
+
+bool ClientBuffer::useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) {
+    if (nullptr == dest || nullptr == src) {
+        QNN_ERROR("Received nullptr");
+        return false;
+    }
+    if (false == freeTensorBuffer(dest)) {
+        return false;
+    }
+
+    QNN_TENSOR_SET_MEM_TYPE(dest, QNN_TENSOR_GET_MEM_TYPE(src));
+    QNN_TENSOR_SET_CLIENT_BUF(dest, QNN_TENSOR_GET_CLIENT_BUF(src));
+    m_sameMemoryFreeTensors.insert(dest);
+    return true;
+}
+
+bool ClientBuffer::useExternalMemory(Qnn_Tensor_t* dest, void* extMem) {
+    if (nullptr == dest || nullptr == extMem) {
+        QNN_ERROR("Received nullptr");
+        return false;
+    }
+
+    Qnn_ClientBuffer_t clientBuffer;
+    clientBuffer.data     = extMem;
+    clientBuffer.dataSize = QNN_TENSOR_GET_CLIENT_BUF(dest).dataSize;
+    if (false == freeTensorBuffer(dest)) {
+        return false;
+    }
+
+    QNN_TENSOR_SET_MEM_TYPE(dest, QNN_TENSORMEMTYPE_RAW);
+    QNN_TENSOR_SET_CLIENT_BUF(dest, clientBuffer);
+    m_sameMemoryFreeTensors.insert(dest);
+    return true;
+}
+
+void* ClientBuffer::allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) {
+    return nullptr;
+}
+
+bool ClientBuffer::mapFusedBufferOffset(
+        Qnn_Tensor_t*       tensor,
+        size_t              tensorDataSize,
+        int32_t             fd,
+        uint32_t            offset,
+        uint64_t            totalBufferSize,
+        void*               memPointer,
+        Qnn_ContextHandle_t contextHandle
+) {
+    return false;
+}
+
+bool ClientBuffer::deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) {
+    return false;
+}
+
+void ClientBuffer::freeFusedBuffers() {}
+
+size_t ClientBuffer::getOffset(Qnn_Tensor_t* tensor) {
+    return 0;
+}
+
+size_t ClientBuffer::getTotalBufferSize(Qnn_Tensor_t* tensor) {
+    return 0;
+}
\ No newline at end of file
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/ClientBuffer.hpp b/Genie/Genie/src/qualla/engines/qnn-api/ClientBuffer.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6fa5b9fb081e817057abcc80a4fcc479f0ee8e1d
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/ClientBuffer.hpp
@@ -0,0 +1,85 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include "IBufferAlloc.hpp"
+#include "Log.hpp"
+#include <unordered_set>
+#include <stdlib.h>
+
+class ClientBuffer final : public IBufferAlloc {
+  public:
+    ClientBuffer() {};
+
+    // Disable copy constructors, r-value referencing, etc
+    ClientBuffer(const ClientBuffer&) = delete;
+
+    ClientBuffer& operator=(const ClientBuffer&) = delete;
+
+    ClientBuffer(ClientBuffer&&) = delete;
+
+    ClientBuffer& operator=(ClientBuffer&&) = delete;
+
+    bool initialize() override { return true; };
+
+    void* getBuffer(Qnn_Tensor_t* tensor) override;
+
+    int getFd(Qnn_Tensor_t* tensor) override {
+        QNN_WARN("getFd: This is not ION memory");
+        return -1;
+    };
+
+    size_t getOffset(Qnn_Tensor_t* tensor) override;
+    size_t getBufferSize(Qnn_Tensor_t* tensor) override;
+    size_t getTotalBufferSize(Qnn_Tensor_t* tensor) override;
+
+    bool allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) override;
+
+    bool freeTensorBuffer(Qnn_Tensor_t* tensor) override;
+
+    bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) override;
+    bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset) override { return false; }
+
+    bool useExternalMemory(Qnn_Tensor_t* dest, void* extMem) override;
+
+    void* allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) override;
+    bool  allocateBuffers(
+             const std::map<int, std::map<std::string, size_t>>& allocs_per_chunk,
+             std::map<std::string, std::pair<int, size_t>>&      tensor_offsets
+     ) override {
+        return false;
+    };
+
+    bool mapFusedBufferOffset(
+            Qnn_Tensor_t*       tensor,
+            size_t              tensorDataSize,
+            int32_t             fd,
+            uint32_t            offset,
+            uint64_t            totalBufferSize,
+            void*               memPointer,
+            Qnn_ContextHandle_t contextHandle
+    ) override;
+    bool deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) override;
+    void freeFusedBuffers() override;
+
+    bool mapFusedBufferOffset(
+            Qnn_Tensor_t*       tensor,
+            int                 alloc_idx,
+            size_t              offset,
+            Qnn_ContextHandle_t ctx,
+            size_t              size
+    ) override {
+        return false;
+    }
+
+    virtual ~ClientBuffer() {};
+
+  private:
+    std::unordered_set<Qnn_Tensor_t*> m_sameMemoryFreeTensors;
+};
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/IBackend.hpp b/Genie/Genie/src/qualla/engines/qnn-api/IBackend.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1f6bf00a8c3a1bfd35b74e8d878e9927347ef67a
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/IBackend.hpp
@@ -0,0 +1,156 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <map>
+#include "ICommandLineManager.hpp"
+#include "QnnBackend.h"
+#include "QnnContext.h"
+#include "QnnGraph.h"
+#include "QnnLog.h"
+#include "QnnTypeDef.hpp"
+#include "QnnProfile.h"
+#include "QnnDevice.h"
+
+// Compile-time definition to check for QNN SDK features using the QNN API version
+#define QUALLA_QNN_API_VERSION                                                                     \
+    (QNN_API_VERSION_MAJOR * 10000 + QNN_API_VERSION_MINOR * 100 + QNN_API_VERSION_PATCH)
+
+const uint32_t g_profilingLevelNotSet = 0;
+
+enum class PerfProfile {
+    LOW_BALANCED,
+    BALANCED,
+    DEFAULT,
+    HIGH_PERFORMANCE,
+    SUSTAINED_HIGH_PERFORMANCE,
+    BURST,
+    EXTREME_POWER_SAVER,
+    LOW_POWER_SAVER,
+    POWER_SAVER,
+    HIGH_POWER_SAVER,
+    SYSTEM_SETTINGS,
+    NO_USER_INPUT,
+    CUSTOM,
+    INVALID
+};
+
+// This is the interface that enables backend specific extensions in qnn-net-run.
+// It is designed as hooks in the timeline of various events in NetRun.
+// Backends that intend to implement custom features through qnn-net-run will have
+// to implement this interface and add functionality in appropriate methods depending
+// on where/when the custom functionality needs to be exercised.
+// These functions/hooks will be called through the IBackend interface from within
+// qnn-net-run wherever necessary.
+class IBackend {
+  public:
+    virtual ~IBackend() {}
+
+    virtual bool setupLogging(QnnLog_Callback_t callback, QnnLog_Level_t maxLogLevel) = 0;
+
+    virtual bool initialize(void* backendLibHandle) = 0;
+
+    virtual bool setPerfProfile(PerfProfile perfProfile) = 0;
+
+    virtual QnnProfile_Level_t getProfilingLevel() = 0;
+
+    virtual bool loadConfig(std::string configFile) = 0;
+
+    virtual bool loadCommandLineArgs(std::shared_ptr<ICommandLineManager> clManager) = 0;
+
+    virtual bool beforeBackendInitialize(
+            QnnBackend_Config_t*** customConfigs,
+            uint32_t*              configCount
+    ) = 0;
+
+    virtual bool afterBackendInitialize() = 0;
+
+    virtual bool beforeContextCreate(
+            QnnContext_Config_t*** customConfigs,
+            uint32_t*              configCount
+    ) = 0;
+
+    virtual bool afterContextCreate() = 0;
+
+    virtual bool beforeComposeGraphs(
+            GraphConfigInfo_t*** customGraphConfigs,
+            uint32_t*            graphCount
+    ) = 0;
+
+    virtual bool afterComposeGraphs() = 0;
+
+#if QUALLA_QNN_API_VERSION >= 21700
+    virtual bool beforeGraphFinalizeUpdateConfig(
+            const char*          graphName,
+            Qnn_GraphHandle_t    graphHandle,
+            QnnGraph_Config_t*** customConfigs,
+            uint32_t*            configCount
+    ) = 0;
+#endif
+
+    virtual bool beforeGraphFinalize() = 0;
+
+    virtual bool afterGraphFinalize() = 0;
+
+    virtual bool beforeRegisterOpPackages() = 0;
+
+    virtual bool afterRegisterOpPackages() = 0;
+
+    virtual bool beforeExecute(
+            const char*          graphName,
+            QnnGraph_Config_t*** customConfigs,
+            uint32_t*            configCount
+    ) = 0;
+
+    virtual bool afterExecute() = 0;
+
+    virtual bool beforeContextFree() = 0;
+
+    virtual bool afterContextFree() = 0;
+
+    virtual bool beforeBackendTerminate() = 0;
+
+    virtual bool afterBackendTerminate() = 0;
+
+    virtual bool beforeCreateFromBinary(
+            QnnContext_Config_t*** customConfigs,
+            uint32_t*              configCount
+    ) = 0;
+
+    virtual bool afterCreateFromBinary() = 0;
+
+#if QUALLA_QNN_API_VERSION >= 21700
+    virtual bool beforeCreateContextsFromBinaryList(
+            std::map<std::string, std::tuple<QnnContext_Config_t**, uint32_t>>*
+                                   contextKeyToCustomConfigsMap,
+            QnnContext_Config_t*** commonCustomConfigs,
+            uint32_t*              commonConfigCount
+    ) = 0;
+
+    virtual bool afterCreateContextsFromBinaryList() = 0;
+#endif
+
+    virtual bool beforeCreateDevice(QnnDevice_Config_t*** deviceConfigs, uint32_t* configCount) = 0;
+
+    virtual bool afterCreateDevice() = 0;
+
+    virtual bool beforeFreeDevice() = 0;
+
+    virtual bool afterFreeDevice() = 0;
+};
+
+// These are the function types that the backend extensions shared library is
+// expected to expose. The first function helps NetRun obtain a valid implementation
+// of IBackend interface and the second is used to destroy the same interface at the end.
+// The function names themselves are expected to be these strings:
+//      1. "createBackendInterface"
+//      2. "destroyBackendInterface"
+// These functions need to be tagged with extern "C" and their symbols need to be exposed.
+typedef IBackend* (*CreateBackendInterfaceFnType_t)();
+typedef void (*DestroyBackendInterfaceFnType_t)(IBackend*);
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/IBufferAlloc.hpp b/Genie/Genie/src/qualla/engines/qnn-api/IBufferAlloc.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6eac516d96a69ce8d07984b7a1ba2899c55997ae
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/IBufferAlloc.hpp
@@ -0,0 +1,56 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+#include "QnnTypes.h"
+#include <map>
+#include <string>
+#include <vector>
+#include <utility>
+#include <unordered_map>
+
+class IBufferAlloc {
+  public:
+    virtual ~IBufferAlloc() {}
+    IBufferAlloc() {}
+    virtual bool   initialize()                                                      = 0;
+    virtual void*  getBuffer(Qnn_Tensor_t* tensor)                                   = 0;
+    virtual int    getFd(Qnn_Tensor_t* tensor)                                       = 0;
+    virtual size_t getOffset(Qnn_Tensor_t* tensor)                                   = 0;
+    virtual size_t getBufferSize(Qnn_Tensor_t* tensor)                               = 0;
+    virtual size_t getTotalBufferSize(Qnn_Tensor_t* tensor)                          = 0;
+    virtual bool   allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) = 0;
+    virtual bool   freeTensorBuffer(Qnn_Tensor_t* tensor)                            = 0;
+    virtual bool   useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src)              = 0;
+    virtual bool   useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset)  = 0;
+    virtual bool   useExternalMemory(Qnn_Tensor_t* dest, void* extMem)               = 0;
+    virtual void*  allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd)       = 0;
+    virtual bool   allocateBuffers(
+              const std::map<int, std::map<std::string, size_t>>& allocs_per_chunk,
+              std::map<std::string, std::pair<int, size_t>>&      tensor_offsets
+      ) = 0;
+    virtual bool mapFusedBufferOffset(
+            Qnn_Tensor_t*       tensor,
+            size_t              tensorDataSize,
+            int32_t             fd,
+            uint32_t            offset,
+            uint64_t            totalBufferSize,
+            void*               memPointer,
+            Qnn_ContextHandle_t contextHandle
+    ) = 0;
+    virtual bool mapFusedBufferOffset(
+            Qnn_Tensor_t*       tensor,
+            int                 alloc_idx,
+            size_t              offset,
+            Qnn_ContextHandle_t ctx,
+            size_t              size
+    ) = 0;
+
+    virtual bool deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) = 0;
+    virtual void freeFusedBuffers()                                = 0;
+};
\ No newline at end of file
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/ICommandLineManager.hpp b/Genie/Genie/src/qualla/engines/qnn-api/ICommandLineManager.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..13150c8b142aab8bcd63b6649104ac04ac1da84f
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/ICommandLineManager.hpp
@@ -0,0 +1,95 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <cctype>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
+class ICommandLineManager {
+  public:
+    enum class Error { SUCCESS, PARSE_FAILURE, UNUSED_ARGUMENTS, OVER_SUBSCRIBED_ARGUMENTS };
+
+    using ValueList_t = std::vector<std::shared_ptr<const std::string>>;
+
+    /**
+   * @brief Parses provided command line arguments into key value pairs
+   *
+   * @param[in] argc   Number of char* arguments in argv
+   *
+   * @param[in] argv   Pointer to first element of null terminated character arrays
+   *
+   * @return Error code:
+   *         - SUCCESS: provided command line arguments match expected format: --key=value, --key
+   *         - PARSE_FAILURE: The provided command line arguments do not match expected format
+   *
+   */
+    virtual Error parseClArgs(size_t argc, char** argv) = 0;
+
+    /**
+   * @brief Provides passed values for requested key if available
+   *
+   * @param[in] key   Key string of option
+   *
+   * @return (False, empty) if key is not an available argument
+   *
+   */
+    virtual std::tuple<bool, ValueList_t> serveArg(const std::string& key) = 0;
+
+    /**
+   * @brief Checks whether any provided commandline arguments remain unserved
+   *
+   * @return True if unconsumed arguments remain, False otherwise
+   */
+    virtual bool allArgumentsServed() const = 0;
+
+    /**
+   * @brief Validates command line arguments were correctly utilized
+   *
+   * @return Error code:
+   *         - SUCCESS: provided command line arguments were utilized following implementations
+   * policy
+   *         - UNUSED_ARGUMENTS: Some arguments passed were not consumed
+   *         - OVER_SUBSCRIBED_ARGUMENTS: Some arguments were requested by multiple times
+   *
+   */
+    virtual Error validateUsage() = 0;
+
+    virtual ~ICommandLineManager() = default;
+
+    static bool isKey(const std::string& arg) {
+        return (arg.length() > keyPrefix().length()) && (arg.find(keyPrefix()) == 0) &&
+               std::isalpha(arg.at(keyPrefix().length()));
+    }
+
+    static Error parseKey(const std::string& arg, std::string& keyOut) {
+        if (!isKey(arg)) {
+            return Error::PARSE_FAILURE;
+        }
+
+        auto valueSplit = arg.find(keyValueSplit());
+        keyOut          = valueSplit != arg.npos ? arg.substr(0, valueSplit) : arg;
+        return Error::SUCCESS;
+    }
+
+    static Error parseValue(const std::string& arg, std::string& valueOut) {
+        auto valueSplit = arg.find(keyValueSplit());
+        if (valueSplit == arg.npos || valueSplit == arg.length() - 1) {
+            return Error::PARSE_FAILURE;
+        }
+        valueOut = arg.substr(valueSplit + 1);
+        return Error::SUCCESS;
+    }
+
+  private:
+    static const std::string keyPrefix() { return "--"; };
+    static char              keyValueSplit() { return '='; };
+};
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/IOTensor.cpp b/Genie/Genie/src/qualla/engines/qnn-api/IOTensor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9ad51cb4a6b8ab777cf92c928fa19217f653a9a2
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/IOTensor.cpp
@@ -0,0 +1,382 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+#include <cstring>
+#include <fstream>
+#include <iostream>
+
+#include "ClientBuffer.hpp"
+#include "IBufferAlloc.hpp"
+#include "IOTensor.hpp"
+#include "RpcMem.hpp"
+#include "QnnTypeMacros.hpp"
+
+#ifdef _WIN32
+    #define __strdup _strdup
+#else
+    #define __strdup strdup
+#endif
+
+IOTensor::IOTensor(BufferAlloc bufferAllocIn, QNN_INTERFACE_VER_TYPE* qnnInterface)
+    : m_bufferAlloc(bufferAllocIn), m_qnnInterface(qnnInterface),
+      m_bufferManager(new ClientBuffer()) {}
+
+bool IOTensor::initialize(Qnn_ContextHandle_t contextHandle) {
+    if (m_bufferAlloc == BufferAlloc::SHARED_BUFFER) {
+        m_bufferManager = std::unique_ptr<IBufferAlloc>(new RpcMem(contextHandle, m_qnnInterface));
+    }
+
+    if (true != m_bufferManager->initialize()) {
+        QNN_ERROR("Failed to initialize buffer manager");
+        return false;
+    }
+
+    return true;
+}
+
+IOTensor::~IOTensor() {
+    if (m_bufferAlloc == BufferAlloc::SHARED_BUFFER) {
+        m_bufferManager->freeFusedBuffers();
+    }
+}
+
+// Setup details for Qnn_Tensor_t for execution
+// based on information in TensorWrapper provided by model.so.
+bool IOTensor::setupTensors(
+        Qnn_Tensor_t**                           tensors,
+        std::unordered_map<std::string, void*>&  tensorNameToTensorPointer,
+        uint32_t                                 tensorCount,
+        TensorWrapper*                           tensorWrappers,
+        std::unordered_map<std::string, size_t>& tensorsSize,
+        Qnn_ContextHandle_t                      contextHandle,
+        bool                                     skipBufferAllocation
+) {
+
+    if (nullptr == tensorWrappers) {
+        QNN_ERROR("tensorWrappers is nullptr");
+        return false;
+    }
+    if (0 == tensorCount) {
+        QNN_DEBUG("tensor count is 0. Nothing to setup.");
+        return true;
+    }
+
+    *tensors = (Qnn_Tensor_t*)calloc(1, tensorCount * sizeof(Qnn_Tensor_t));
+    if (nullptr == *tensors) {
+        QNN_ERROR("mem alloc failed for *tensors");
+        return false;
+    }
+
+    auto returnStatus = true;
+
+    uint64_t totalBufferSize = 0;
+    void*    memPointer      = nullptr;
+    int32_t  fd              = -1;
+    if (m_bufferAlloc == BufferAlloc::SHARED_BUFFER) {
+        // Calculate the total size of the tensors
+        for (size_t tensorIdx = 0; tensorIdx < tensorCount; tensorIdx++) {
+            auto wrapperTensorName =
+                    std::string(GET_TENSOR_WRAPPER_NAME(tensorWrappers[tensorIdx]));
+            totalBufferSize += tensorsSize[wrapperTensorName];
+        }
+        QNN_DEBUG("Calculated total size %lu", totalBufferSize);
+
+        if (!skipBufferAllocation) {
+            // Allocate the buffer of this size
+            memPointer = m_bufferManager->allocateTensorFusedBuffer(totalBufferSize, &fd);
+            if (memPointer) {
+                QNN_DEBUG(
+                        "Successfully allocated a buffer of size %lu, pointer %p, fd %d",
+                        (unsigned long)totalBufferSize,
+                        memPointer,
+                        fd
+                );
+            } else {
+                QNN_ERROR(
+                        "Not able to allocate buffer of size %lu", (unsigned long)totalBufferSize
+                );
+                return false;
+            }
+        }
+    }
+
+    uint64_t offset = 0;
+
+    for (size_t tensorIdx = 0; tensorIdx < tensorCount; tensorIdx++) {
+        Qnn_Tensor_t wrapperTensor = GET_TENSOR_WRAPPER_TENSOR(tensorWrappers[tensorIdx]);
+        auto wrapperTensorName = std::string(GET_TENSOR_WRAPPER_NAME(tensorWrappers[tensorIdx]));
+        if (true == returnStatus) {
+            (*tensors)[tensorIdx] = QNN_TENSOR_INIT;
+            returnStatus          = deepCopyQnnTensorInfo(((*tensors) + tensorIdx), &wrapperTensor);
+        }
+        if (true == returnStatus) {
+            size_t tensorDataSize = tensorsSize[wrapperTensorName];
+            if (m_bufferAlloc == BufferAlloc::SHARED_BUFFER) {
+                if (!skipBufferAllocation) {
+                    returnStatus = m_bufferManager->mapFusedBufferOffset(
+                            ((*tensors) + tensorIdx),
+                            tensorDataSize,
+                            fd,
+                            offset,
+                            totalBufferSize,
+                            memPointer,
+                            contextHandle
+                    );
+                    offset += tensorDataSize;
+                }
+            } else {
+                returnStatus = m_bufferManager->allocateTensorBuffer(
+                        ((*tensors) + tensorIdx), tensorDataSize
+                );
+            }
+        }
+        if (true != returnStatus) {
+            QNN_ERROR("Failure in setupTensors, cleaning up resources");
+            tearDownTensors(*tensors, tensorIdx);
+            *tensors = nullptr;
+            QNN_ERROR("Failure in setupTensors, done cleaning up resources");
+            return false;
+        } else {
+            tensorNameToTensorPointer.insert({wrapperTensorName, ((*tensors) + tensorIdx)});
+            // QNN_DEBUG("allocateBuffer successful");
+        }
+    }
+
+    return returnStatus;
+}
+
+// Setup details for all input tensors for graph execution.
+bool IOTensor::setupInputTensors(
+        Qnn_Tensor_t**                           inputs,
+        std::unordered_map<std::string, void*>&  tensorNameToTensorPointer,
+        const GraphInfo_t&                       graphInfo,
+        std::unordered_map<std::string, size_t>& inputTensorsSize,
+        Qnn_ContextHandle_t                      contextHandle,
+        bool                                     skipBufferAllocation
+) {
+
+    if (true != setupTensors(
+                        inputs,
+                        tensorNameToTensorPointer,
+                        graphInfo.numInputTensors,
+                        (graphInfo.inputTensors),
+                        inputTensorsSize,
+                        contextHandle,
+                        skipBufferAllocation
+                )) {
+        QNN_ERROR("Failure in setupInputTensors, cleaning up resources");
+        if (nullptr != *inputs) {
+            QNN_DEBUG("cleaning up input tensors");
+            tearDownTensors(*inputs, graphInfo.numInputTensors);
+            *inputs = nullptr;
+        }
+        QNN_ERROR("Failure in setupInputTensors, done cleaning up resources");
+
+        return false;
+    }
+
+    return true;
+}
+
+// Setup details for all output tensors for graph execution.
+bool IOTensor::setupOutputTensors(
+        Qnn_Tensor_t**                           outputs,
+        std::unordered_map<std::string, void*>&  tensorNameToTensorPointer,
+        const GraphInfo_t&                       graphInfo,
+        std::unordered_map<std::string, size_t>& outputTensorsSize,
+        Qnn_ContextHandle_t                      contextHandle,
+        bool                                     skipBufferAllocation
+) {
+
+    if (true != setupTensors(
+                        outputs,
+                        tensorNameToTensorPointer,
+                        graphInfo.numOutputTensors,
+                        (graphInfo.outputTensors),
+                        outputTensorsSize,
+                        contextHandle,
+                        skipBufferAllocation
+                )) {
+        QNN_ERROR("Failure in setupOutputTensors, cleaning up resources");
+        if (nullptr != *outputs) {
+            QNN_DEBUG("cleaning up output tensors");
+            tearDownTensors(*outputs, graphInfo.numOutputTensors);
+            *outputs = nullptr;
+        }
+        QNN_ERROR("Failure in setupOutputTensors, done cleaning up resources");
+
+        return false;
+    }
+
+    return true;
+}
+
+bool IOTensor::mapFusedBufferOffset(
+        GraphInfo_t*                                                  graph_info,
+        Qnn_ContextHandle_t                                           context_handle,
+        const std::map<std::string, std::tuple<int, size_t, size_t>>& graph_allocs
+) {
+    std::lock_guard lk(_tmp_lock); // READ COMMENT IN IOTensor.hpp _tmp_lock
+
+    bool ret = true;
+    for (const bool mode : {true, false}) {
+        TensorWrapper* tensor_bank = (mode) ? graph_info->inputTensors : graph_info->outputTensors;
+        uint32_t num_tensors = (mode) ? graph_info->numInputTensors : graph_info->numOutputTensors;
+
+        for (size_t tidx = 0; tidx < num_tensors; tidx++) {
+            TensorWrapper& tensor_wrapper = tensor_bank[tidx];
+
+            Qnn_Tensor_t* tensor      = &GET_TENSOR_WRAPPER_TENSOR(tensor_wrapper);
+            std::string   tensor_name = std::string(GET_TENSOR_WRAPPER_NAME(tensor_wrapper));
+
+            if (!graph_allocs.contains(tensor_name)) continue;
+            auto& [alloc_idx, offset, size] = graph_allocs.at(tensor_name);
+            ret &= m_bufferManager->mapFusedBufferOffset(
+                    tensor, alloc_idx, offset, context_handle, size
+            );
+        }
+    }
+
+    return ret;
+}
+
+// Clean up all tensors related data after execution.
+bool IOTensor::tearDownTensors(Qnn_Tensor_t* tensors, uint32_t tensorCount) {
+
+    if (nullptr != tensors) {
+        QNN_DEBUG("cleaning up resources for tensors");
+        for (size_t tensorIdx = 0; tensorIdx < tensorCount; tensorIdx++) {
+            // QNN_DEBUG("freeing resources for tensor: %zu", tensorIdx);
+            if (nullptr != QNN_TENSOR_GET_DIMENSIONS(&tensors[tensorIdx])) {
+                // QNN_DEBUG("freeing maxDimensions");
+                free(QNN_TENSOR_GET_DIMENSIONS(&tensors[tensorIdx]));
+            }
+            if (m_bufferAlloc == BufferAlloc::SHARED_BUFFER) {
+                m_bufferManager->deregisterTensorFusedBuffer(&(tensors[tensorIdx]));
+            } else {
+                m_bufferManager->freeTensorBuffer(&(tensors[tensorIdx]));
+            }
+            m_freeTensorsPointerSet.insert(&(tensors[tensorIdx]));
+        }
+        free(tensors);
+        tensors = nullptr;
+    }
+
+    return true;
+}
+
+// Clean up all tensors after execution.
+bool IOTensor::tearDownTensors(std::vector<Qnn_Tensor_t*>& tensors, uint32_t numTensors) {
+
+    for (Qnn_Tensor_t* tensor : tensors) {
+        tearDownTensors(tensor, numTensors);
+    }
+
+    return true;
+}
+
+bool IOTensor::tearDownTensors(std::vector<Qnn_Tensor_t>& tensors) {
+    return tearDownTensors(tensors.data(), tensors.size());
+}
+
+// Clean up all tensors after execution.
+bool IOTensor::tearDownTensors(
+        std::unordered_map<std::string, Qnn_Tensor_t*>& tensors,
+        std::unordered_map<std::string, uint32_t>&      tensorCountMap
+) {
+
+    for (auto& tensor : tensors) {
+        tearDownTensors(tensor.second, tensorCountMap[tensor.first]);
+    }
+
+    return true;
+}
+
+// Clean up all tensors after execution.
+bool IOTensor::tearDownTensors(
+        std::vector<std::unordered_map<std::string, Qnn_Tensor_t*>>& tensors,
+        std::unordered_map<std::string, uint32_t>&                   tensorCountMap
+) {
+
+    for (auto& tensor : tensors) {
+        tearDownTensors(tensor, tensorCountMap);
+    }
+
+    return true;
+}
+
+bool IOTensor::deepCopyQnnTensorInfo(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) {
+
+    if (nullptr == dest || nullptr == src) {
+        QNN_ERROR("Received nullptr");
+        return false;
+    }
+
+    // set tensor.version before using QNN_TENSOR_SET macros, as they require the version to be set
+    // to correctly assign values
+    dest->version          = src->version;
+    const char* tensorName = QNN_TENSOR_GET_NAME(src);
+    if (!tensorName) {
+        QNN_TENSOR_SET_NAME(dest, nullptr);
+    } else {
+        QNN_TENSOR_SET_NAME(dest, __strdup(tensorName));
+    }
+    QNN_TENSOR_SET_ID(dest, QNN_TENSOR_GET_ID(src));
+    QNN_TENSOR_SET_TYPE(dest, QNN_TENSOR_GET_TYPE(src));
+    QNN_TENSOR_SET_DATA_FORMAT(dest, QNN_TENSOR_GET_DATA_FORMAT(src));
+    QNN_TENSOR_SET_DATA_TYPE(dest, QNN_TENSOR_GET_DATA_TYPE(src));
+    Qnn_QuantizeParams_t qParams = QNN_QUANTIZE_PARAMS_INIT;
+    qParams.encodingDefinition   = QNN_TENSOR_GET_QUANT_PARAMS(src).encodingDefinition;
+    qParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
+    if (QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding ==
+        QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
+        qParams.quantizationEncoding = QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding;
+        qParams.scaleOffsetEncoding  = QNN_TENSOR_GET_QUANT_PARAMS(src).scaleOffsetEncoding;
+    } else if (QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding ==
+               QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
+        qParams.quantizationEncoding = QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding;
+        qParams.axisScaleOffsetEncoding.axis =
+                QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.axis;
+        qParams.axisScaleOffsetEncoding.numScaleOffsets =
+                QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets;
+        if (QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets > 0) {
+            qParams.axisScaleOffsetEncoding.scaleOffset = (Qnn_ScaleOffset_t*)malloc(
+                    QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets *
+                    sizeof(Qnn_ScaleOffset_t)
+            );
+            if (qParams.axisScaleOffsetEncoding.scaleOffset) {
+                for (size_t idx = 0;
+                     idx < QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets;
+                     idx++) {
+                    qParams.axisScaleOffsetEncoding.scaleOffset[idx].scale =
+                            QNN_TENSOR_GET_QUANT_PARAMS(src)
+                                    .axisScaleOffsetEncoding.scaleOffset[idx]
+                                    .scale;
+                    qParams.axisScaleOffsetEncoding.scaleOffset[idx].offset =
+                            QNN_TENSOR_GET_QUANT_PARAMS(src)
+                                    .axisScaleOffsetEncoding.scaleOffset[idx]
+                                    .offset;
+                }
+            }
+        }
+    }
+    QNN_TENSOR_SET_QUANT_PARAMS(dest, qParams);
+    QNN_TENSOR_SET_RANK(dest, QNN_TENSOR_GET_RANK(src));
+    QNN_TENSOR_SET_DIMENSIONS(dest, nullptr);
+    if (QNN_TENSOR_GET_RANK(src) > 0) {
+        QNN_TENSOR_SET_DIMENSIONS(
+                dest, (uint32_t*)malloc(QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t))
+        );
+        if (QNN_TENSOR_GET_DIMENSIONS(dest)) {
+            memcpy(QNN_TENSOR_GET_DIMENSIONS(dest),
+                   QNN_TENSOR_GET_DIMENSIONS(src),
+                   QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t));
+        }
+    }
+
+    return true;
+}
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/IOTensor.hpp b/Genie/Genie/src/qualla/engines/qnn-api/IOTensor.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4212bd8af669b9f27bc8a2197f6a7735cc76f066
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/IOTensor.hpp
@@ -0,0 +1,170 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+#pragma once
+
+#include <memory>
+#include <queue>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include <mutex>
+
+#include "IBufferAlloc.hpp"
+#include "QnnTypeDef.hpp"
+#include "Log.hpp"
+#include "QnnBackend.h"
+#include "QnnCommon.h"
+#include "QnnContext.h"
+#include "QnnGraph.h"
+#include "QnnInterface.h"
+#include "QnnProperty.h"
+#include "QnnTensor.h"
+#include "QnnTypes.h"
+enum class BufferAlloc {
+    DEFAULT,       // malloc based allocator
+    SHARED_BUFFER, // shared buffer allocator; actual allocator depends on the platform
+    INVALID
+};
+class IBufferAlloc;
+class IOTensor {
+  public:
+    IOTensor(
+            BufferAlloc             bufferAllocIn = BufferAlloc::DEFAULT,
+            QNN_INTERFACE_VER_TYPE* qnnInterface  = nullptr
+    );
+
+    ~IOTensor();
+
+    bool initialize(Qnn_ContextHandle_t contextHandle = nullptr);
+
+    bool setupInputTensors(
+            Qnn_Tensor_t**                           inputs,
+            std::unordered_map<std::string, void*>&  tensorNameToTensorPointer,
+            const GraphInfo_t&                       graphInfo,
+            std::unordered_map<std::string, size_t>& inputTensorsSize,
+            Qnn_ContextHandle_t                      contextHandle,
+            bool                                     skipBufferAllocation = false
+    );
+
+    bool setupOutputTensors(
+            Qnn_Tensor_t**                           outputs,
+            std::unordered_map<std::string, void*>&  tensorNameToTensorPointer,
+            const GraphInfo_t&                       graphInfo,
+            std::unordered_map<std::string, size_t>& outputTensorsSize,
+            Qnn_ContextHandle_t                      contextHandle,
+            bool                                     skipBufferAllocation = false
+    );
+
+    bool tearDownTensors(Qnn_Tensor_t* tensors, uint32_t tensorCount);
+
+    bool tearDownTensors(std::vector<Qnn_Tensor_t*>& tensors, uint32_t tensorCount);
+    bool tearDownTensors(std::vector<Qnn_Tensor_t>& tensors);
+    bool tearDownTensors(
+            std::unordered_map<std::string, Qnn_Tensor_t*>& tensors,
+            std::unordered_map<std::string, uint32_t>&      tensorCountMap
+    );
+    bool tearDownTensors(
+            std::vector<std::unordered_map<std::string, Qnn_Tensor_t*>>& tensors,
+            std::unordered_map<std::string, uint32_t>&                   tensorCountMap
+    );
+
+    bool tearDownTensors(const GraphInfo_t* graph_info) {
+        bool status = true;
+        if (!tearDownTensors(graph_info->inputTensors, graph_info->numInputTensors)) {
+            status = false;
+            QNN_ERROR("Failed to tear down input tensors for graph %s", graph_info->graphName);
+        }
+
+        if (!tearDownTensors(graph_info->outputTensors, graph_info->numOutputTensors)) {
+            status = false;
+            QNN_ERROR("Failed to tear down output tensors for graph %s", graph_info->graphName);
+        }
+        return status;
+    }
+
+    void* getBuffer(Qnn_Tensor_t* tensor) { return m_bufferManager->getBuffer(tensor); };
+
+    int getFd(Qnn_Tensor_t* tensor) { return m_bufferManager->getFd(tensor); };
+
+    size_t getOffset(Qnn_Tensor_t* tensor) { return m_bufferManager->getOffset(tensor); };
+
+    size_t getBufferSize(Qnn_Tensor_t* tensor) { return m_bufferManager->getBufferSize(tensor); };
+
+    size_t getTotalBufferSize(Qnn_Tensor_t* tensor) {
+        return m_bufferManager->getTotalBufferSize(tensor);
+    }
+
+    void* allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) {
+        return m_bufferManager->allocateTensorFusedBuffer(bufferSize, fd);
+    }
+
+    bool allocateBuffers(
+            const std::map<int, std::map<std::string, size_t>>& allocs_per_chunk,
+            std::map<std::string, std::pair<int, size_t>>&      tensor_offsets
+    ) {
+        return m_bufferManager->allocateBuffers(allocs_per_chunk, tensor_offsets);
+    }
+
+    bool mapFusedBufferOffset(
+            Qnn_Tensor_t*       tensor,
+            size_t              tensorDataSize,
+            int32_t             fd,
+            uint32_t            offset,
+            uint64_t            totalBufferSize,
+            void*               memPointer,
+            Qnn_ContextHandle_t contextHandle
+    ) {
+        return m_bufferManager->mapFusedBufferOffset(
+                tensor, tensorDataSize, fd, offset, totalBufferSize, memPointer, contextHandle
+        );
+    }
+
+    bool mapFusedBufferOffset(
+            GraphInfo_t*                                                  graph_info,
+            Qnn_ContextHandle_t                                           context_handle,
+            const std::map<std::string, std::tuple<int, size_t, size_t>>& graph_allocs
+    );
+
+    bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) {
+        return m_bufferManager->useSameMemory(dest, src);
+    }
+
+    bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset) {
+        return m_bufferManager->useSameMemory(dest, src, offset);
+    }
+
+    bool useExternalMemory(Qnn_Tensor_t* dest, void* extMem) {
+        return m_bufferManager->useExternalMemory(dest, extMem);
+    }
+
+    BufferAlloc getBufferAllocType() { return m_bufferAlloc; }
+
+    std::unordered_set<void*>& getFreeTensorsPointerSet() { return m_freeTensorsPointerSet; }
+
+  private:
+    BufferAlloc                   m_bufferAlloc;
+    QNN_INTERFACE_VER_TYPE*       m_qnnInterface;
+    std::unique_ptr<IBufferAlloc> m_bufferManager;
+    std::unordered_set<void*>     m_freeTensorsPointerSet;
+
+    // There seems to be a race condition in mapFusedBufferOffset because we are
+    // calling it from multiple threads. Maybe memRegister/memDeRegister is not thread-safe
+    // Until I figure this out, adding a temporary lock here. TODO: Fix and remove this!
+    std::mutex _tmp_lock;
+
+    bool deepCopyQnnTensorInfo(Qnn_Tensor_t* dest, Qnn_Tensor_t* src);
+    bool setupTensors(
+            Qnn_Tensor_t**                           tensors,
+            std::unordered_map<std::string, void*>&  tensorNameToTensorPointer,
+            uint32_t                                 tensorCount,
+            TensorWrapper*                           tensorsInfo,
+            std::unordered_map<std::string, size_t>& tensorsSize,
+            Qnn_ContextHandle_t                      contextHandle,
+            bool                                     skipBufferAllocation = false
+    );
+};
\ No newline at end of file
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/Log.hpp b/Genie/Genie/src/qualla/engines/qnn-api/Log.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4b551f35140d20aa94674e7827e084f81ac1dc98
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/Log.hpp
@@ -0,0 +1,24 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <stdio.h>
+
+// FIXME: Use logger from qualla::Env
+
+#define QNN_INFO(fmt, ...)  fprintf(stderr, "[INFO]  " #fmt "\n", ##__VA_ARGS__)
+#define QNN_ERROR(fmt, ...) fprintf(stderr, "[ERROR] " #fmt "\n", ##__VA_ARGS__)
+#define QNN_WARN(fmt, ...)  fprintf(stderr, "[WARN]  " #fmt "\n", ##__VA_ARGS__)
+
+#if 0
+    // #define NSP_LOG_LEVEL 2
+    #define QNN_DEBUG(fmt, ...) fprintf(stderr, "[DEBUG] " #fmt "\n", ##__VA_ARGS__)
+#else
+    #define QNN_DEBUG(fmt, ...)
+#endif
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/NetRunBackend.hpp b/Genie/Genie/src/qualla/engines/qnn-api/NetRunBackend.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ebb7e6eb111f11404e900523d324c0a55b00881b
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/NetRunBackend.hpp
@@ -0,0 +1,173 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <string>
+
+#include "ICommandLineManager.hpp"
+#include "IBackend.hpp"
+
+// This is an implementation of IBackend interface within qnn-net-run.
+// NetRunBackend provides a dummy implementation of IBackend as a concrete
+// implementation is needed in case there is no backend extensions library
+// supplied by the user.
+// This is built as part of QnnNetRun library and is used in case of no
+// user supplied backend extensions implementation.
+class NetRunBackend final : public IBackend {
+  public:
+    NetRunBackend() {}
+
+    virtual ~NetRunBackend() {}
+
+    virtual bool setupLogging(QnnLog_Callback_t callback, QnnLog_Level_t maxLogLevel) override {
+        ignore(callback);
+        ignore(maxLogLevel);
+        return true;
+    }
+
+    virtual bool initialize(void* backendLibHandle) override {
+        ignore(backendLibHandle);
+        return true;
+    }
+
+    virtual bool setPerfProfile(PerfProfile perfProfile) override {
+        ignore(perfProfile);
+        return true;
+    }
+
+    virtual QnnProfile_Level_t getProfilingLevel() override { return g_profilingLevelNotSet; }
+
+    virtual bool loadConfig(std::string configFile) override {
+        ignore(configFile);
+        return true;
+    }
+
+    virtual bool loadCommandLineArgs(std::shared_ptr<ICommandLineManager> clManager) override {
+        ignore(clManager);
+        return true;
+    }
+
+    virtual bool beforeBackendInitialize(
+            QnnBackend_Config_t*** customConfigs,
+            uint32_t*              configCount
+    ) override {
+        ignore(customConfigs);
+        ignore(configCount);
+        return true;
+    }
+
+    virtual bool afterBackendInitialize() override { return true; }
+
+    virtual bool beforeContextCreate(QnnContext_Config_t*** customConfigs, uint32_t* configCount)
+            override {
+        ignore(customConfigs);
+        ignore(configCount);
+        return true;
+    }
+
+    virtual bool afterContextCreate() override { return true; }
+
+    virtual bool beforeComposeGraphs(GraphConfigInfo_t*** customGraphConfigs, uint32_t* graphCount)
+            override {
+        ignore(customGraphConfigs);
+        ignore(graphCount);
+        return true;
+    }
+
+    virtual bool afterComposeGraphs() override { return true; }
+
+#if QUALLA_QNN_API_VERSION >= 21700
+    virtual bool beforeGraphFinalizeUpdateConfig(
+            const char*          graphName,
+            Qnn_GraphHandle_t    graphHandle,
+            QnnGraph_Config_t*** customConfigs,
+            uint32_t*            configCount
+    ) override {
+        ignore(graphName);
+        ignore(graphHandle);
+        ignore(customConfigs);
+        ignore(configCount);
+        return true;
+    }
+#endif
+
+    virtual bool beforeGraphFinalize() override { return true; }
+
+    virtual bool afterGraphFinalize() override { return true; }
+
+    virtual bool beforeRegisterOpPackages() override { return true; }
+
+    virtual bool afterRegisterOpPackages() override { return true; }
+
+    virtual bool beforeExecute(
+            const char*          graphName,
+            QnnGraph_Config_t*** customConfigs,
+            uint32_t*            configCount
+    ) override {
+        ignore(graphName);
+        ignore(customConfigs);
+        ignore(configCount);
+        return true;
+    }
+
+    virtual bool afterExecute() override { return true; }
+
+    virtual bool beforeContextFree() override { return true; }
+
+    virtual bool afterContextFree() override { return true; }
+
+    virtual bool beforeBackendTerminate() override { return true; }
+
+    virtual bool afterBackendTerminate() override { return true; }
+
+    virtual bool beforeCreateFromBinary(QnnContext_Config_t*** customConfigs, uint32_t* configCount)
+            override {
+        ignore(customConfigs);
+        ignore(configCount);
+        return true;
+    }
+
+    virtual bool afterCreateFromBinary() override { return true; }
+
+#if QUALLA_QNN_API_VERSION >= 21700
+    virtual bool beforeCreateContextsFromBinaryList(
+            std::map<std::string, std::tuple<QnnContext_Config_t**, uint32_t>>*
+                                   contextKeyToCustomConfigsMap,
+            QnnContext_Config_t*** commonCustomConfigs,
+            uint32_t*              commonConfigCount
+    ) override {
+        ignore(contextKeyToCustomConfigsMap);
+        ignore(commonCustomConfigs);
+        ignore(commonConfigCount);
+        return true;
+    }
+
+    virtual bool afterCreateContextsFromBinaryList() override { return true; }
+#endif
+
+    virtual bool beforeCreateDevice(QnnDevice_Config_t*** deviceConfigs, uint32_t* configCount)
+            override {
+        ignore(deviceConfigs);
+        ignore(configCount);
+        return true;
+    }
+
+    virtual bool afterCreateDevice() override { return true; }
+
+    virtual bool beforeFreeDevice() override { return true; }
+
+    virtual bool afterFreeDevice() override { return true; }
+
+  private:
+    // Utility function to ignore compiler warnings when a variable
+    // is unused. Recommended by Herb Sutter in Sutter's Mill
+    // instead of (void)variable.
+    template <typename T>
+    void ignore(const T&) {}
+};
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/QnnApi.cpp b/Genie/Genie/src/qualla/engines/qnn-api/QnnApi.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e7abc2b8db5d23ce164b037e9be2a1cfd0597415
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/QnnApi.cpp
@@ -0,0 +1,2681 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <chrono>
+#if defined(__GNUC__) && !defined(__clang__)
+#include <cstring>
+#endif
+#ifndef _WIN32
+    #include <sys/mman.h>
+#endif
+
+#include "dlwrap.hpp"
+#include "QnnApi.hpp"
+
+#ifdef SPILLFILL
+    #include "QnnHtpContext.h"
+    #include "QnnHtpCommon.h"
+#endif
+
+QnnApi::~QnnApi() {
+    // QNN_DEBUG("Destroying Performance");
+    // if (true != destroyPerformance()) {
+    //     QNN_DEBUG("Could not destroy Performance");
+    // }
+
+    QNN_DEBUG("Freeing Graphs");
+    if (true != freeGraphs()) {
+        QNN_DEBUG("Could not free Graphs");
+    }
+
+    // Free context if not already done
+    if (m_isContextCreated) {
+        QNN_DEBUG("Freeing Context");
+        if (true != freeContext()) {
+            QNN_DEBUG("Could not free context");
+        }
+    }
+
+    if (m_profileBackendHandle) {
+        QNN_DEBUG("Freeing profile handle");
+        if (QNN_PROFILE_NO_ERROR != m_qnnInterface.profileFree(m_profileBackendHandle))
+            QNN_ERROR("Could not free QNN HTP backend profile handle.");
+    }
+
+    QNN_DEBUG("Freeing Device");
+    if (getDeviceStatus()) {
+        if (true != freeDevice()) {
+            QNN_ERROR("Device Free failure");
+        }
+    }
+
+    QNN_DEBUG("Terminating Logging");
+    if (m_isLogInitialized) {
+        terminateLog();
+    }
+    m_isLogInitialized = false;
+
+    // Terminate backend
+    if (m_isBackendInitialized) {
+        QNN_DEBUG("Terminating Backend");
+        if (true != terminateBackend()) {
+            QNN_DEBUG("Could not terminate backend");
+        }
+    }
+
+    // Skip dlclose for HTP because it runs its own cleanup routines later.
+    if (m_backendLibraryHandle && (m_backendId != QNN_BACKEND_ID_HTP)) {
+        QNN_DEBUG("Closing Backend Lib Handle");
+        dlclose(m_backendLibraryHandle);
+    }
+
+    if (m_libModelHandle) {
+        QNN_DEBUG("Closing Model Lib Handle");
+        dlclose(m_libModelHandle);
+    }
+
+    if (!m_contextBinBuffersToBeCleared.empty()) {
+        for (auto& [buffer, bufferSize] : m_contextBinBuffersToBeCleared) {
+            QNN_DEBUG("Free context bin buffer %p of size %lu", buffer, bufferSize);
+            if (m_mmapContextBins) {
+#ifndef _WIN32
+                if (munmap(buffer, bufferSize)) {
+                    QNN_ERROR("Failed to unmap buffer for context");
+                }
+#endif
+            } else {
+                delete[] buffer;
+            }
+        }
+        m_contextBinBuffersToBeCleared.clear();
+    }
+}
+
+bool QnnApi::getContextConfigs(
+        QnnContext_Config_t***          configs,
+        uint32_t&                       contextConfigCount,
+        Qnn_Priority_t                  contextPriority,
+        bool                            graphSwitching,
+        const std::vector<std::string>& execSelectGraphs,
+        bool                            loadSelectGraphs
+) {
+    std::vector<QnnContext_Config_t*> contextConfigPtrsVec;
+
+    if (contextPriority != QNN_PRIORITY_DEFAULT) {
+        contextConfigPtrsVec.push_back((QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t)));
+        contextConfigPtrsVec.back()->option =
+                QnnContext_ConfigOption_t::QNN_CONTEXT_CONFIG_OPTION_PRIORITY;
+        contextConfigPtrsVec.back()->priority = contextPriority;
+    }
+
+    const char** graphNames = nullptr;
+
+    if (loadSelectGraphs && !execSelectGraphs.empty()) {
+        graphNames = (const char**)malloc(sizeof(const char*) * (execSelectGraphs.size() + 1));
+        for (size_t i = 0; i < execSelectGraphs.size(); ++i) {
+            graphNames[i] = execSelectGraphs[i].c_str();
+        }
+
+        graphNames[execSelectGraphs.size()] = nullptr; // NULL termination
+        contextConfigPtrsVec.push_back((QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t)));
+        contextConfigPtrsVec.back()->option =
+                QnnContext_ConfigOption_t::QNN_CONTEXT_CONFIG_ENABLE_GRAPHS;
+        contextConfigPtrsVec.back()->enableGraphs = graphNames;
+    }
+
+    if (graphSwitching) {
+        contextConfigPtrsVec.push_back((QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t)));
+        contextConfigPtrsVec.back()->option =
+                QnnContext_ConfigOption_t::QNN_CONTEXT_CONFIG_MEMORY_LIMIT_HINT;
+        contextConfigPtrsVec.back()->memoryLimitHint = 1024;
+
+        contextConfigPtrsVec.push_back((QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t)));
+        contextConfigPtrsVec.back()->option =
+                QnnContext_ConfigOption_t::QNN_CONTEXT_CONFIG_PERSISTENT_BINARY;
+        contextConfigPtrsVec.back()->isPersistentBinary = 1;
+    }
+
+    contextConfigCount = contextConfigPtrsVec.size();
+
+    QnnContext_Config_t** contextConfigPtrs =
+            (QnnContext_Config_t**)malloc(contextConfigCount * sizeof(QnnContext_Config_t*));
+
+    if (nullptr == contextConfigPtrs) {
+        QNN_ERROR("Could not allocate memory for allContextConfigs");
+        return false;
+    }
+
+    for (size_t i = 0; i < contextConfigCount; i++) {
+        contextConfigPtrs[i] = contextConfigPtrsVec[i];
+    }
+
+    *configs = contextConfigPtrs;
+
+    return true;
+}
+
+bool QnnApi::mergeAllContextConfigs(
+        QnnContext_Config_t*** allCustomContextConfigs,
+        QnnContext_Config_t**  customConfigs,
+        QnnContext_Config_t**  contextConfigs,
+        uint32_t               customConfigCount,
+        uint32_t               contextConfigCount
+) {
+    QnnContext_Config_t** allContextConfigs{nullptr};
+    if (contextConfigCount + customConfigCount > 0) {
+        allContextConfigs = (QnnContext_Config_t**)calloc(
+                (contextConfigCount + customConfigCount + 1), sizeof(QnnContext_Config_t*)
+        );
+        if (nullptr == allContextConfigs) {
+            QNN_ERROR("Could not allocate memory for allContextConfigs");
+            return false;
+        }
+        for (size_t cnt = 0; cnt < contextConfigCount; cnt++) {
+            allContextConfigs[cnt] = contextConfigs[cnt];
+        }
+        for (size_t cnt = 0; cnt < customConfigCount; cnt++) {
+            allContextConfigs[cnt + contextConfigCount] = customConfigs[cnt];
+        }
+    }
+    *allCustomContextConfigs = allContextConfigs;
+
+    return true;
+}
+
+bool QnnApi::freeContextConfigs(QnnContext_Config_t** contextConfigs, uint32_t contextConfigCount) {
+    if (contextConfigs) {
+        for (size_t i = 0; i < contextConfigCount; i++) {
+            if (contextConfigs[i]->option == QNN_CONTEXT_CONFIG_ENABLE_GRAPHS) {
+                free((const char**)contextConfigs[i]->enableGraphs);
+            }
+            free(contextConfigs[i]);
+        }
+        free(contextConfigs);
+    }
+
+    return true;
+}
+
+bool QnnApi::setGraphConfigsBeforeExecute(
+        Qnn_GraphHandle_t   graphHandle,
+        QnnGraph_Config_t** graphConfigs,
+        uint32_t            configCount
+) {
+    if (!graphConfigs || configCount == 0u) {
+        QNN_ERROR("No graph configs to set");
+        return false;
+    }
+
+    std::vector<const QnnGraph_Config_t*> graphConfigsPointers(configCount + 1, nullptr);
+    for (size_t idx = 0u; idx < configCount; idx++) {
+        graphConfigsPointers[idx] = graphConfigs[idx];
+    }
+    if (QNN_SUCCESS != m_qnnInterface.graphSetConfig(graphHandle, graphConfigsPointers.data())) {
+        QNN_ERROR("Failed to set graph configs.");
+        return false;
+    }
+
+    return true;
+}
+
+bool QnnApi::getQnnInterface(std::string backendPath) {
+
+    QnnInterfaceGetProvidersFn_t getInterfaceProviders{nullptr};
+
+    m_backendLibraryHandle = dlopen(backendPath.c_str(), RTLD_NOW);
+    if (nullptr == m_backendLibraryHandle) {
+        QNN_ERROR("Unable to load backend. dlerror(): %s", dlerror());
+        return false;
+    }
+
+    // Get QNN Interface
+    getInterfaceProviders = (QnnInterfaceGetProvidersFn_t
+    )dlsym(m_backendLibraryHandle, "QnnInterface_getProviders");
+    if (nullptr == getInterfaceProviders) {
+        return false;
+    }
+
+    uint32_t numProviders{0};
+    QnnInterface_t** interfaceProviders{nullptr};
+    if (QNN_SUCCESS !=
+        getInterfaceProviders((const QnnInterface_t***)&interfaceProviders, &numProviders)) {
+        QNN_ERROR("Failed to get interface providers.");
+        return false;
+    }
+
+    if (nullptr == interfaceProviders) {
+        QNN_ERROR("Failed to get interface providers: null interface providers received.");
+        return false;
+    }
+    if (0u == numProviders) {
+        QNN_ERROR("Failed to get interface providers: 0 interface providers.");
+        return false;
+    }
+
+    bool foundValidInterface{false};
+    for (size_t pIdx = 0; pIdx < numProviders; pIdx++) {
+        const Qnn_ApiVersion_t& apiVersion = interfaceProviders[pIdx]->apiVersion;
+        if ((QNN_API_VERSION_MAJOR == apiVersion.coreApiVersion.major) &&
+            (QNN_API_VERSION_MINOR <= apiVersion.coreApiVersion.minor)) {
+            foundValidInterface = true;
+            m_qnnInterface = interfaceProviders[pIdx]->QNN_INTERFACE_VER_NAME;
+            m_backendId = interfaceProviders[pIdx]->backendId;
+            break;
+        }
+    }
+
+    if (!foundValidInterface) {
+        QNN_ERROR("Unable to find a valid interface.");
+        m_backendLibraryHandle = nullptr;
+        return false;
+    }
+
+    return true;
+}
+
+bool QnnApi::getQnnSystemInterface(std::string systemLibraryPath) {
+    QnnSystemInterfaceGetProvidersFn_t getSystemInterfaceProviders{nullptr};
+
+    void* systemLibraryHandle = dlopen(systemLibraryPath.c_str(), RTLD_NOW);
+    if (nullptr == systemLibraryHandle) {
+        QNN_ERROR("Unable to load system library. dlerror(): %s", dlerror());
+        return false;
+    }
+
+    // Get QNN System Interface
+    getSystemInterfaceProviders = (QnnSystemInterfaceGetProvidersFn_t
+    )dlsym(systemLibraryHandle, "QnnSystemInterface_getProviders");
+    if (nullptr == getSystemInterfaceProviders) {
+        return false;
+    }
+
+    uint32_t numProviders{0};
+    QnnSystemInterface_t** systemInterfaceProviders{nullptr};
+    if (QNN_SUCCESS !=
+        getSystemInterfaceProviders(
+                (const QnnSystemInterface_t***)&systemInterfaceProviders, &numProviders
+        )) {
+        QNN_ERROR("Failed to get system interface providers.");
+        return false;
+    }
+    if (nullptr == systemInterfaceProviders) {
+        QNN_ERROR(
+                "Failed to get system interface providers: null system interface providers received."
+        );
+        return false;
+    }
+    if (0 == numProviders) {
+        QNN_ERROR("Failed to get system interface providers: 0 system interface providers.");
+        return false;
+    }
+
+    bool foundValidSystemInterface{false};
+    for (size_t pIdx = 0; pIdx < numProviders; pIdx++) {
+        const Qnn_Version_t& systemApiVersion = systemInterfaceProviders[pIdx]->systemApiVersion;
+        if (QNN_SYSTEM_API_VERSION_MAJOR == systemApiVersion.major &&
+            QNN_SYSTEM_API_VERSION_MINOR <= systemApiVersion.minor) {
+            foundValidSystemInterface = true;
+            m_qnnSystemInterface = systemInterfaceProviders[pIdx]->QNN_SYSTEM_INTERFACE_VER_NAME;
+            break;
+        }
+    }
+    if (!foundValidSystemInterface) {
+        QNN_ERROR("Unable to find a valid system interface.");
+        return false;
+    }
+
+    return true;
+}
+
+bool QnnApi::loadModel(std::string model_path) {
+    const char* dlsym_error;
+
+    dlerror();
+    m_libModelHandle = dlopen(model_path.c_str(), RTLD_NOW);
+    if (nullptr == m_libModelHandle) {
+        QNN_ERROR("Unable to load model. dlerror(): %s", dlerror());
+        return false;
+    }
+
+    // Currently model Prefix is fixed. If model was prepared with
+    // custom prefix, we need to change this.
+    std::string modelPrefix = "QnnModel";
+
+    std::string modelPrepareFunc = modelPrefix + "_composeGraphs";
+    m_composeGraphsFnHandle =
+            (ComposeGraphsFnHandleType_t)dlsym(m_libModelHandle, modelPrepareFunc.c_str());
+    dlsym_error = dlerror();
+    if (dlsym_error || nullptr == m_composeGraphsFnHandle) {
+        m_composeGraphsFnHandle           = nullptr;
+        std::string genaiModelPrepareFunc = "QnnModel_GenAI_composeGraphs";
+        m_genaiComposeGraphsFnHandle      = (GenAIComposeGraphsFnHandleType_t
+        )dlsym(m_libModelHandle, genaiModelPrepareFunc.c_str());
+        dlsym_error                       = dlerror();
+        if (dlsym_error || nullptr == m_genaiComposeGraphsFnHandle) {
+            QNN_ERROR("Did not find QnnModel_composeGraph function: %s", dlsym_error);
+            return false;
+        }
+    }
+
+    std::string modelFreeFunc = modelPrefix + "_freeGraphsInfo";
+    m_freeGraphInfoFnHandle =
+            (FreeGraphInfoFnHandleType_t)dlsym(m_libModelHandle, modelFreeFunc.c_str());
+    dlsym_error = dlerror();
+    if (dlsym_error || nullptr == m_freeGraphInfoFnHandle) {
+        QNN_ERROR("Did not find QnnModel_freeGraphsInfo function: %s", dlsym_error);
+        return false;
+    }
+
+    return true;
+}
+
+void QnnApi::qnnLogCallback(
+        const char*    fmt,
+        QnnLog_Level_t level,
+        uint64_t       timestamp,
+        va_list        args
+) {
+    char        buffer[1024] = "";
+    const char* levelStr     = "";
+    switch (level) {
+    case QNN_LOG_LEVEL_ERROR:
+        levelStr = " ERROR ";
+        break;
+    case QNN_LOG_LEVEL_WARN:
+        levelStr = "WARNING";
+        break;
+    case QNN_LOG_LEVEL_INFO:
+        levelStr = "  INFO ";
+        break;
+    case QNN_LOG_LEVEL_DEBUG:
+        levelStr = " DEBUG ";
+        break;
+    case QNN_LOG_LEVEL_VERBOSE:
+        levelStr = "VERBOSE";
+        break;
+    case QNN_LOG_LEVEL_MAX:
+        levelStr = "UNKNOWN";
+        break;
+    }
+
+    int pos = snprintf(
+            buffer, sizeof(buffer), "QNN: [%s] time=%lu:", levelStr, (unsigned long)timestamp
+    );
+    vsnprintf(buffer + pos, sizeof(buffer) - pos, fmt, args);
+    printf("%s", buffer);
+}
+
+bool QnnApi::initializeLogging(const QnnLog_Level_t& logLevel, bool debug_qnn) {
+    // initialize logging in the backend
+    if (nullptr != m_qnnInterface.logCreate) {
+        QnnLog_Callback_t logCallback = nullptr;
+        if (debug_qnn) logCallback = QnnApi::qnnLogCallback;
+
+        QNN_DEBUG(
+                "Initializing logging in the backend. Callback: [%p], Log Level: [%d]",
+                logCallback,
+                logLevel
+        );
+        if (QNN_SUCCESS != m_qnnInterface.logCreate(logCallback, logLevel, &m_logHandle)) {
+            QNN_WARN("Unable to initialize logging in the backend.");
+        }
+        m_isLogInitialized = true;
+    }
+    else {
+        QNN_WARN("Logging not available in the backend.");
+        return true;
+    }
+
+    return true;
+}
+
+void QnnApi::terminateLog() {
+    // Terminate logging in the backend
+    if (nullptr != m_qnnInterface.logFree && nullptr != m_logHandle) {
+        if (QNN_SUCCESS != m_qnnInterface.logFree(m_logHandle)) {
+            QNN_WARN("Unable to terminate logging in the backend.");
+        }
+    }
+}
+
+bool QnnApi::initializeBackendExtensions(
+        BackendExtensionsConfigs backendExtensionsConfig,
+        PerfProfile              parsedPerfProfile,
+        bool                     debug_qnn
+) {
+
+    std::unique_ptr<BackendExtensions> backendExtensions(new BackendExtensions(
+            backendExtensionsConfig, m_backendLibraryHandle, parsedPerfProfile, nullptr, debug_qnn
+    ));
+    if (nullptr == backendExtensions) {
+        QNN_ERROR("Unable to create backend extensions object.");
+        return false;
+    }
+    if (!backendExtensions->initialize()) {
+        QNN_ERROR("Unable to initialize backend extensions.");
+        return false;
+    }
+    m_backendExtensions = std::move(backendExtensions);
+
+    return true;
+}
+
+// Initialize a QnnBackend.
+bool QnnApi::initializeBackend() {
+    if (nullptr == m_qnnInterface.backendCreate) {
+        QNN_ERROR("BackendCreate API is not supported for this backend");
+        return false;
+    }
+
+    QnnBackend_Config_t** customConfigs{nullptr};
+    uint32_t              customConfigCount{0};
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->beforeBackendInitialize(
+                    &customConfigs, &customConfigCount
+            )) {
+            QNN_ERROR("Extensions Failure in beforeBackendInitialize()");
+            return false;
+        }
+    }
+    QnnBackend_Config_t** allBackendConfigs{nullptr};
+    if ((m_backendConfigCount + customConfigCount) > 0) {
+        allBackendConfigs = (QnnBackend_Config_t**)calloc(
+                (m_backendConfigCount + customConfigCount + 1), sizeof(QnnBackend_Config_t*)
+        );
+        if (nullptr == allBackendConfigs) {
+            QNN_ERROR("Could not allocate memory for allBackendConfigs");
+            return false;
+        }
+        for (size_t cnt = 0; cnt < m_backendConfigCount; cnt++) {
+            allBackendConfigs[cnt] = m_backendConfigs[cnt];
+        }
+        for (size_t cnt = 0; cnt < customConfigCount; cnt++) {
+            allBackendConfigs[cnt + m_backendConfigCount] = customConfigs[cnt];
+        }
+    }
+
+    auto returnStatus = m_qnnInterface.backendCreate(
+            m_logHandle, (const QnnBackend_Config_t**)allBackendConfigs, &m_backendHandle
+    );
+    if (QNN_SUCCESS != returnStatus) {
+        QNN_ERROR(
+                "Could not initialize backend due to error = %llu", (unsigned long long)returnStatus
+        );
+        if (allBackendConfigs) {
+            free(allBackendConfigs);
+        }
+        return false;
+    }
+    QNN_DEBUG("Initialize Backend Returned Status = %llu", (unsigned long long)returnStatus);
+
+    m_isBackendInitialized = true;
+    if (allBackendConfigs) {
+        free(allBackendConfigs);
+    }
+
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->afterBackendInitialize()) {
+            QNN_ERROR("Extensions Failure in afterBackendInitialize()");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+// Terminate the backend after done.
+bool QnnApi::terminateBackend() {
+
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->beforeBackendTerminate()) {
+            QNN_ERROR("Extensions Failure in beforeBackendTerminate()");
+            return false;
+        }
+    }
+    // Terminate backend
+    if (m_isBackendInitialized && nullptr != m_qnnInterface.backendFree) {
+        QNN_DEBUG("Freeing backend");
+        if (QNN_BACKEND_NO_ERROR != m_qnnInterface.backendFree(m_backendHandle)) {
+            QNN_ERROR("Could not free backend");
+        }
+    }
+    m_isBackendInitialized = false;
+
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->afterBackendTerminate()) {
+            QNN_ERROR("Extensions Failure in afterBackendTerminate()");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool QnnApi::createDevice() {
+    QnnDevice_Config_t** deviceConfigs{nullptr};
+    uint32_t             configCount{0};
+
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->beforeCreateDevice(&deviceConfigs, &configCount)) {
+            QNN_ERROR("Extensions Failure in beforeCreateDevice()");
+            return false;
+        }
+    }
+    std::vector<const QnnDevice_Config_t*> deviceConfigPointers(configCount + 1, nullptr);
+    for (size_t idx = 0u; idx < configCount; idx++) {
+        deviceConfigPointers[idx] = deviceConfigs[idx];
+    }
+    if (nullptr != m_qnnInterface.deviceCreate) {
+        auto qnnStatus = m_qnnInterface.deviceCreate(
+                m_logHandle, deviceConfigPointers.data(), &m_deviceHandle
+        );
+        if (QNN_SUCCESS != qnnStatus) {
+            if (QNN_DEVICE_ERROR_UNSUPPORTED_FEATURE == qnnStatus) {
+                QNN_WARN("Device feature unsupported");
+            } else {
+                QNN_ERROR("Failed to create device: %lu", (unsigned long)qnnStatus);
+                return false;
+            }
+        }
+    }
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->afterCreateDevice()) {
+            QNN_ERROR("Extensions Failure in afterCreateDevice()");
+            return false;
+        }
+    }
+    return true;
+}
+
+bool QnnApi::freeDevice() {
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->beforeFreeDevice()) {
+            QNN_ERROR("Extensions Failure in beforeFreeDevice()");
+            return false;
+        }
+    }
+    if (nullptr != m_qnnInterface.deviceFree) {
+        auto qnnStatus = m_qnnInterface.deviceFree(m_deviceHandle);
+        if (QNN_SUCCESS != qnnStatus) {
+            if (QNN_DEVICE_ERROR_UNSUPPORTED_FEATURE == qnnStatus) {
+                QNN_WARN("Device feature unsupported");
+            } else {
+                QNN_ERROR("Failed to free device: %lu", (unsigned long)qnnStatus);
+                return false;
+            }
+        }
+    }
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->afterFreeDevice()) {
+            QNN_ERROR("Extensions Failure in afterfreeDevice()");
+            return false;
+        }
+    }
+    return true;
+}
+
+// Create a Context in a backend.
+bool QnnApi::createContext(ContextConfigs contextConfig) {
+    QnnContext_Config_t** customConfigs{nullptr};
+    uint32_t              customConfigCount{0};
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->beforeContextCreate(
+                    &customConfigs, &customConfigCount
+            )) {
+            QNN_ERROR("Extensions Failure in beforeContextCreate()");
+            return false;
+        }
+    }
+
+    QnnContext_Config_t** contextConfigs     = nullptr;
+    uint32_t              contextConfigCount = 0;
+    if (true != getContextConfigs(&contextConfigs, contextConfigCount, contextConfig.priority)) {
+        QNN_ERROR("Couldn't populate context configs");
+        return false;
+    }
+
+    QnnContext_Config_t** allContextConfigs{nullptr};
+    if (true != mergeAllContextConfigs(
+                        &allContextConfigs,
+                        customConfigs,
+                        contextConfigs,
+                        customConfigCount,
+                        contextConfigCount
+                )) {
+        QNN_ERROR("Error merging custom and context configs");
+        return false;
+    }
+
+    Qnn_ContextHandle_t contextHandle{nullptr};
+    if (QNN_CONTEXT_NO_ERROR != m_qnnInterface.contextCreate(
+                                        m_backendHandle,
+                                        nullptr,
+                                        (const QnnContext_Config_t**)allContextConfigs,
+                                        &contextHandle
+                                )) {
+        QNN_ERROR("Could not create context");
+        if (allContextConfigs) {
+            free(allContextConfigs);
+        }
+
+        return false;
+    }
+
+    m_contextVec.push_back(contextHandle);
+    m_isContextCreated = true;
+    if (allContextConfigs) {
+        free(allContextConfigs);
+    }
+
+    if (true != freeContextConfigs(contextConfigs, contextConfigCount)) {
+        QNN_ERROR("Couldn't free context configs");
+        return false;
+    }
+
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->afterContextCreate()) {
+            QNN_ERROR("Extensions Failure in afterContextCreate()");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+// Free context after done.
+bool QnnApi::freeContext() {
+
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->beforeContextFree()) {
+            QNN_ERROR("Extensions Failure in beforeContextFree()");
+            return false;
+        }
+    }
+    for (const auto& context : m_contextVec) {
+        if (QNN_CONTEXT_NO_ERROR != m_qnnInterface.contextFree(context, nullptr)) {
+            QNN_ERROR("Could not free context");
+            return false;
+        }
+    }
+    m_isContextCreated = false;
+
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->afterContextFree()) {
+            QNN_ERROR("Extensions Failure in afterContextFree()");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+// Calls composeGraph function in QNN's model.so.
+// composeGraphs is supposed to populate graph related
+// information in graphsInfo and graphsCount.
+// m_debug is the option supplied to composeGraphs to
+// say that all intermediate tensors including output tensors
+// are expected to be read by the app.
+bool QnnApi::composeGraphs(std::vector<GraphConfigs> graphConfigs) {
+    GraphConfigInfo_t** customConfigs{nullptr};
+    uint32_t            customConfigGraphsCount{0};
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->beforeComposeGraphs(
+                    &customConfigs, &customConfigGraphsCount
+            )) {
+            QNN_ERROR("Extensions Failure in beforeComposeGraphs()");
+            return false;
+        }
+    }
+
+    std::map<std::string, std::vector<QnnGraph_Config_t*>> graphConfigsPointers;
+    if (!graphConfigs.empty()) {
+        for (auto const& inputGraphConfig : graphConfigs) {
+            // Only reset the memory for this graph, if it has not previously been populated with
+            // something
+            if (graphConfigsPointers.find(inputGraphConfig.graphName) ==
+                graphConfigsPointers.end()) {
+                graphConfigsPointers[inputGraphConfig.graphName] =
+                        std::vector<QnnGraph_Config_t*>();
+                graphConfigsPointers[inputGraphConfig.graphName].reserve(s_graphConfigsReserveCount
+                );
+            }
+            if (inputGraphConfig.priorityPresent) {
+                QnnGraph_Config_t* newGraphConfig =
+                        (QnnGraph_Config_t*)malloc(sizeof(QnnGraph_Config_t));
+                newGraphConfig->option   = QNN_GRAPH_CONFIG_OPTION_PRIORITY;
+                newGraphConfig->priority = inputGraphConfig.priority;
+                graphConfigsPointers[inputGraphConfig.graphName].push_back(newGraphConfig);
+            }
+        }
+    }
+
+    if (customConfigs != nullptr && customConfigGraphsCount > 0) {
+        for (size_t gIdx = 0; gIdx < customConfigGraphsCount; gIdx++) {
+            auto configPtr = customConfigs[gIdx]->graphConfigs;
+            if (*configPtr &&
+                (!customConfigs[gIdx]->graphName || strlen(customConfigs[gIdx]->graphName) == 0)) {
+                QNN_ERROR("Graph configs specified without a graph name in the backend extensions."
+                );
+                return false;
+            }
+            if (customConfigs[gIdx]->graphName && strlen(customConfigs[gIdx]->graphName) > 0 &&
+                *configPtr) {
+                if (graphConfigsPointers.find(customConfigs[gIdx]->graphName) ==
+                    graphConfigsPointers.end()) {
+                    graphConfigsPointers[customConfigs[gIdx]->graphName] =
+                            std::vector<QnnGraph_Config_t*>();
+                    graphConfigsPointers[customConfigs[gIdx]->graphName].reserve(
+                            s_graphConfigsReserveCount
+                    );
+                }
+                while (*configPtr) {
+                    graphConfigsPointers[customConfigs[gIdx]->graphName].push_back(
+                            (QnnGraph_Config_t*)*configPtr
+                    );
+                    configPtr++;
+                }
+            }
+        }
+    }
+
+    GraphConfigInfo_t** graphConfigsInfo{nullptr};
+    graphConfigsInfo =
+            (GraphConfigInfo_t**)calloc(graphConfigsPointers.size(), sizeof(GraphConfigInfo_t*));
+    size_t graphIdx{0};
+    for (auto const& graphConfig : graphConfigsPointers) {
+        if (graphConfigsInfo && graphConfig.second.size() > 0) {
+            graphConfigsInfo[graphIdx] = (GraphConfigInfo_t*)malloc(sizeof(GraphConfigInfo_t));
+            graphConfigsInfo[graphIdx]->graphName    = (char*)graphConfig.first.c_str();
+            graphConfigsInfo[graphIdx]->graphConfigs = (const QnnGraph_Config_t**)calloc(
+                    graphConfig.second.size() + 1, sizeof(QnnGraph_Config_t*)
+            );
+            for (size_t cnt = 0; cnt < graphConfig.second.size(); cnt++) {
+                graphConfigsInfo[graphIdx]->graphConfigs[cnt] = graphConfig.second[cnt];
+            }
+        }
+        graphIdx++;
+    }
+
+    int status = m_composeGraphsFnHandle(
+            m_backendHandle,
+            m_qnnInterface,
+            m_contextVec[0],
+            (const GraphConfigInfo_t**)graphConfigsInfo,
+            graphConfigsPointers.size(),
+            &m_graphsInfo,
+            &m_graphsCount,
+            m_DebugModeRequested,
+            nullptr,
+            QnnLog_Level_t::QNN_LOG_LEVEL_VERBOSE
+    );
+
+    if (graphConfigsInfo) {
+        for (size_t gIdx = 0; gIdx < graphConfigsPointers.size(); gIdx++) {
+            if (graphConfigsInfo[gIdx]) {
+                if (graphConfigsInfo[gIdx]->graphConfigs) {
+                    free(graphConfigsInfo[gIdx]->graphConfigs);
+                    graphConfigsInfo[gIdx]->graphConfigs = nullptr;
+                    graphConfigsInfo[gIdx]->graphName    = nullptr;
+                }
+                free(graphConfigsInfo[gIdx]);
+                graphConfigsInfo[gIdx] = nullptr;
+            }
+        }
+        free(graphConfigsInfo);
+    }
+
+    for (auto const& graphConfig : graphConfigsPointers) {
+        for (size_t cnt = 0; cnt < graphConfig.second.size(); cnt++) {
+            if (graphConfig.second[cnt]) {
+                free(graphConfig.second[cnt]);
+            }
+        }
+        // graphConfig.second.clear();
+    }
+
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->afterComposeGraphs()) {
+            QNN_ERROR("Extensions Failure in afterComposeGraphs()");
+            return false;
+        }
+    }
+
+    if (0 != status) {
+        QNN_ERROR("Failed in composeGraphs()");
+        return false;
+    }
+
+    // For now, we only handle 1 graph for this framework.
+    if (m_graphsCount != 1) {
+        QNN_ERROR("Only one graph is supported by framework");
+        return false;
+    }
+
+    return true;
+}
+
+bool QnnApi::composeGraphs(
+        std::vector<GraphConfigs> graphConfigs,
+        uint32_t*                 inputDim,
+        uint32_t                  inputRank,
+        uint32_t*                 outputDim,
+        uint32_t                  outputRank,
+        uint32_t*                 kvDim,
+        uint32_t                  kvRank,
+        Qnn_Param_t*              params,
+        uint32_t                  numParams
+) {
+    ModelError status = m_genaiComposeGraphsFnHandle(
+            m_backendHandle,
+            m_qnnInterface,
+            m_contextVec[0],
+            nullptr,
+            0,
+            inputDim,
+            inputRank,
+            outputDim,
+            outputRank,
+            kvDim,
+            kvRank,
+            params,
+            numParams,
+            &m_graphsInfo,
+            &m_graphsCount,
+            m_DebugModeRequested,
+            nullptr,
+            QnnLog_Level_t::QNN_LOG_LEVEL_VERBOSE
+    );
+
+    if (status == MODEL_NO_ERROR) {
+        return true;
+    }
+
+    return false;
+}
+
+bool QnnApi::finalizeGraphs() {
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->beforeGraphFinalize()) {
+            QNN_ERROR("Extensions Failure in beforeGraphFinalize()");
+            return false;
+        }
+    }
+
+    for (size_t graphIdx = 0; graphIdx < m_graphsCount; graphIdx++) {
+        if (QNN_GRAPH_NO_ERROR !=
+            m_qnnInterface.graphFinalize(m_graphsInfo[graphIdx]->graph, nullptr, nullptr)) {
+            return false;
+        }
+
+        if (m_profileBackendHandle) {
+            extractBackendProfilingInfo(m_profileBackendHandle);
+        }
+    }
+
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->afterGraphFinalize()) {
+            QNN_ERROR("Extensions Failure in afterGraphFinalize()");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool QnnApi::freeGraphs() {
+    freeGraphsInfo(&m_graphsInfo, m_graphsCount);
+    if (m_graphsInfo) {
+        free(m_graphsInfo);
+    }
+    m_graphsInfo  = nullptr;
+    m_graphsCount = 0;
+    return true;
+}
+
+bool QnnApi::mapAndGetContextBinaryInfo(
+        const bool                            use_mmap,
+        std::shared_ptr<uint8_t>&             buffer,
+        const std::string                     binaryPath,
+        const uint64_t                        bufferSize,
+        const size_t                          contextIdx,
+        const bool                            graphSwitching,
+        QnnSystemContext_Handle_t             sysCtxHandle,
+        const QnnSystemContext_BinaryInfo_t** binaryInfo
+) {
+    if (use_mmap) {
+#ifndef _WIN32
+        void* mappedBuffer = nullptr;
+        if (true != mmapBinaryFile(binaryPath, &mappedBuffer, bufferSize)) {
+            QNN_ERROR("Failed to read binary data for context index = %zu", contextIdx);
+            return false;
+        }
+        buffer = std::shared_ptr<uint8_t>(
+                static_cast<uint8_t*>(mappedBuffer),
+                [graphSwitching, bufferSize](uint8_t* ptr) {
+                    if (!graphSwitching) {
+                        munmap(ptr, bufferSize);
+                    }
+                }
+        );
+#else
+        return false;
+#endif
+    } else {
+        buffer = std::shared_ptr<uint8_t>(new uint8_t[bufferSize], [graphSwitching](uint8_t* ptr) {
+            if (!graphSwitching) {
+                delete[] ptr;
+            }
+        });
+
+        if (!buffer) {
+            QNN_ERROR("Failed to allocate memory for context index = %zu", contextIdx);
+            return false;
+        }
+        if (true != readBinaryFromFile(binaryPath, buffer.get(), bufferSize)) {
+            QNN_ERROR("Failed to read binary data for context index = %zu", contextIdx);
+            return false;
+        }
+    }
+
+    if (graphSwitching) {
+        m_contextBinBuffersToBeCleared.push_back({buffer.get(), bufferSize});
+    }
+
+    Qnn_ContextBinarySize_t binaryInfoSize{0};
+    if (QNN_SUCCESS != m_qnnSystemInterface.systemContextGetBinaryInfo(
+                               sysCtxHandle,
+                               static_cast<void*>(buffer.get()),
+                               bufferSize,
+                               binaryInfo,
+                               &binaryInfoSize
+                       )) {
+        QNN_ERROR("Failed to get context binary info for context index = %zu", contextIdx);
+        return false;
+    }
+
+    return true;
+}
+
+bool QnnApi::parseIOTensorsAndAccumulate(){
+
+    for(int gIdx =0;gIdx<m_graphsCount;gIdx++){
+        auto& graph_info = m_graphsInfo[gIdx];
+        for (bool io : {true, false}) {
+            uint32_t n_tensors = (io) ? graph_info->numInputTensors : graph_info->numOutputTensors;
+            auto  tensor_wrappers = (io) ? graph_info->inputTensors : graph_info->outputTensors;
+            for (size_t tensor_idx = 0; tensor_idx < n_tensors; tensor_idx++) {
+
+                TensorWrapper& tensor      = tensor_wrappers[tensor_idx];
+                std::string    tensor_name = QnnApi::getTensorName(tensor);
+
+                std::vector<size_t> tensor_dims;
+                if (!QnnApi::getTensorShape(tensor_dims, tensor)){
+                    QNN_ERROR("Couldn't get tensor shape : %s", tensor_name.c_str());
+                    return false;
+                }
+
+                std::vector<qualla::QnnUtils::QuantParam> quantParams;
+                if (!QnnApi::getTensorQuantParams(&tensor_wrappers[tensor_idx], quantParams)) {
+                    quantParams.emplace_back(0, 0);
+                }
+
+                m_graphtoIOMap[gIdx][tensor_name] =
+                        qualla::QnnUtils::Tensor(tensor_wrappers + tensor_idx, tensor_dims, quantParams);
+            }
+        }
+    }
+
+
+    // Maps tensor_name to context bitVector, each bit representing a context the tensor exists in
+    std::map<std::string, CtxBitVector> tensor_ctx_map;
+    // Maps a ContextHandle to a one-hot encoded bitVector (e.g. 1, 2, 4, ...)
+    std::map<int, CtxBitVector> ctx_to_hash;
+
+    // Iterate over all tensors in all GraphVariants to figure out allocations
+    for(int gIdx =0;gIdx<m_graphsCount;gIdx++){
+        auto& graph_info = m_graphsInfo[gIdx];
+        // Map the context handle to a hashed bitVector
+        auto curContextHandle = m_graphIdxToContextIdx[gIdx];
+        if (!ctx_to_hash.contains(curContextHandle)) {
+            ctx_to_hash[curContextHandle] = 1 << ctx_to_hash.size();
+        }
+            for (auto& [tname, tspec] : m_graphtoIOMap[gIdx]) {
+                size_t       size     = tspec.dims.getAlignedSize();
+                CtxBitVector tcontext = ctx_to_hash[curContextHandle];
+
+                // Check if it's LoRA enabled model
+                if (!m_loraWeightEnabled && tname.find("lora") != std::string::npos) m_loraWeightEnabled = true;
+                // Check if graph has lmhead weight input
+                if (!m_lmHeadWeightInput && tname.compare("weight") == 0)
+                    m_lmHeadWeightInput = true;
+
+                // Allocate KV Tensors as in+out
+                if (tname.starts_with("past_")) {
+                    if (tname.ends_with("_in")) continue; // kv_in is processed along with kv_out
+
+                    // For kv_out, add the size of kv_in as well
+                    const std::string tname_in = tname.substr(0, tname.rfind('_')).append("_in");
+
+                    if (m_graphtoIOMap[gIdx].count(tname_in)){
+                        size += m_graphtoIOMap[gIdx][tname_in].dims.getAlignedSize();
+                    }
+
+
+                    // Allocate extra buffer for pointer shift
+                    // 1024-n for keys (1024-n)*128 for values
+                    // For aligned size, we might as well use 1024 and 128*1024
+                    if (m_kvUpdateMethod == POINTER_SHIFT)
+                        size += (tname.starts_with("past_key")) ? m_ctxSize
+                                                                : m_ctxSize * m_kvDim;
+                }
+
+                if (tensor_ctx_map.contains(tname)) { // For duplicate tensor names, link them
+                    CtxBitVector context_bitvec = tensor_ctx_map.at(tname);
+                    size = std::max(m_contextAllocMap[context_bitvec][tname], size);
+                    if ((context_bitvec & tcontext) == 0) // Set of contexts needs to be updated
+                        m_contextAllocMap[context_bitvec].erase(tname);
+
+                    tcontext |= context_bitvec;
+                }
+
+                m_contextAllocMap[tcontext][tname] = size;
+                tensor_ctx_map[tname]          = tcontext;
+            }
+
+        // Cleanup is essential in case of very large number of splits
+        for (auto it = m_contextAllocMap.cbegin(); it != m_contextAllocMap.cend();)
+            it = (it->second.empty()) ? m_contextAllocMap.erase(it) : ++it;
+    }
+
+#if QNN_IO_TENSOR_DEBUG
+for(auto& [bitvector, nameMap] : m_contextAllocMap){
+        for(auto& [tname, size] : nameMap)
+            QNN_DEBUG("Context: %d Tensor name: %s Tensor size: %zu",bitvector,tname.c_str(),size);
+    }
+#endif
+    return true;
+}
+
+bool QnnApi::registerTensorsWithBackend(uint32_t& graphIdx){
+
+            std::map<std::string, std::tuple<int, size_t, size_t>> graph_allocs;
+            for(auto& [tname,tspec] : m_graphtoIOMap[graphIdx]){
+
+                if (tname.starts_with("past_") && tname.ends_with("_in")) continue; // Process past_key/value_Inputs along with the outputs
+                auto& [alloc_idx, offset] = m_tensorAllocInfo.at(tname);
+
+                size_t kv_offset = 0;
+                size_t size      = tspec.dims.getAlignedSize();
+               if (tname.starts_with("past_")) {
+                    auto in_name = tname.substr(0, tname.rfind("_")).append("_in");
+                    if (m_graphtoIOMap[graphIdx].count(in_name)) {
+                        auto kv_in = m_graphtoIOMap[graphIdx][in_name];
+                        kv_offset = kv_in.dims.getAlignedSize();
+                        if (m_kvUpdateMethod == POINTER_SHIFT)
+                            kv_offset += (tname.starts_with("past_key")) ? m_ctxSize
+                                                                         : m_ctxSize * m_kvDim;
+                        graph_allocs[in_name] = {alloc_idx, offset, kv_offset};
+                    }
+                }
+                graph_allocs[tname]       = {alloc_idx, offset + kv_offset, size};
+            }
+        auto& curContextHandle = m_contextVec[m_graphIdxToContextIdx[graphIdx]];
+       if (!m_ioBufferMgr->mapFusedBufferOffset(
+                    m_graphsInfo[graphIdx], curContextHandle, graph_allocs
+            )) {
+            QNN_ERROR("Error mapping tensor to allocation buffers");
+             return false;
+       }
+
+#if QNN_IO_TENSOR_DEBUG
+for(auto& [tname, data] : graph_allocs){
+           QNN_DEBUG("Tensor Name: %s Alloc Idx: %d Tensor Offset: %zu Tensor Size: %zu",tname.c_str(),get<0>(data),get<1>(data),get<2>(data));
+    }
+#endif
+
+       return true;
+
+}
+bool QnnApi::createFromBinary(
+        std::vector<std::string>        cachedBinariesPathVec,
+        ContextConfigs                  contextConfig,
+        int64_t                         spill_fill_buffer_size,
+        uint64_t                        mmap_budget,
+        bool                            graphSwitching,
+        const std::vector<std::string>& execSelectGraphs,
+        bool                            loadSelectGraphs
+) {
+
+    // Let backendExtensions populate configs
+    QnnContext_Config_t** customConfigs{nullptr};
+    uint32_t              customConfigCount{0};
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->beforeCreateFromBinary(
+                    &customConfigs, &customConfigCount
+            )) {
+            QNN_ERROR("Extensions Failure in beforeCreateFromBinary()");
+            return false;
+        }
+    }
+
+    QnnContext_Config_t** contextConfigs     = nullptr;
+    uint32_t              contextConfigCount = 0;
+    if (true != getContextConfigs(
+                        &contextConfigs,
+                        contextConfigCount,
+                        contextConfig.priority,
+                        graphSwitching,
+                        execSelectGraphs,
+                        loadSelectGraphs
+                )) {
+        QNN_ERROR("Couldn't populate context configs");
+        return false;
+    }
+
+    // Merge BE specific and agnostic configs
+    QnnContext_Config_t** allContextConfigs{nullptr};
+    if (true != mergeAllContextConfigs(
+                        &allContextConfigs,
+                        customConfigs,
+                        contextConfigs,
+                        customConfigCount,
+                        contextConfigCount
+                )) {
+        QNN_ERROR("Error merging custom and context configs");
+        return false;
+    }
+
+    if (nullptr == m_qnnSystemInterface.systemContextCreate ||
+        nullptr == m_qnnSystemInterface.systemContextGetBinaryInfo ||
+        nullptr == m_qnnSystemInterface.systemContextFree) {
+        QNN_ERROR("QNN System function pointers are not populated.");
+        return false;
+    }
+
+    graphCountPerContext = getGraphCountPerContext();
+
+#ifdef SPILLFILL
+    Qnn_ContextHandle_t          first_contextHandle{nullptr};
+    QnnHtpContext_CustomConfig_t customConfigSF;
+    customConfigSF.option = QNN_HTP_CONTEXT_CONFIG_OPTION_REGISTER_MULTI_CONTEXTS;
+#endif
+
+    // Reading Binary Buffer and storing for later use during Deserialization
+    std::vector<std::shared_ptr<uint8_t>> bufferVec(cachedBinariesPathVec.size());
+    // Stores sizes of all the Binary Buffers
+    std::vector<uint64_t> allBuffSizes(cachedBinariesPathVec.size());
+    // Stores graphs per Contexts
+    std::vector<uint32_t> graphsPerContext(cachedBinariesPathVec.size());
+
+    for (size_t contextIdx = 0; contextIdx < cachedBinariesPathVec.size(); contextIdx++) {
+
+        auto _start = std::chrono::steady_clock::now(); // context Loading start
+        uint64_t bufferSize{0};
+        std::shared_ptr<uint8_t>& buffer{bufferVec[contextIdx]};
+        uint32_t graphsCount;
+
+        // read serialized binary into a byte buffer
+        bufferSize = getFileSize(cachedBinariesPathVec[contextIdx]);
+        allBuffSizes[contextIdx] = bufferSize;
+        if (0 == bufferSize) {
+            QNN_ERROR(
+                    "Received path to an empty file for context index = %zu. Nothing to deserialize.",
+                    contextIdx
+            );
+            return false;
+        }
+
+        // inspect binary info
+        QnnSystemContext_Handle_t sysCtxHandle{nullptr};
+        if (QNN_SUCCESS != m_qnnSystemInterface.systemContextCreate(&sysCtxHandle)) {
+            QNN_ERROR("Could not create system handle for context index = %zu", contextIdx);
+            return false;
+        }
+
+        const QnnSystemContext_BinaryInfo_t* binaryInfo{nullptr};
+        if (!mapAndGetContextBinaryInfo(
+                    m_mmapContextBins,
+                    buffer,
+                    cachedBinariesPathVec[contextIdx],
+                    bufferSize,
+                    contextIdx,
+                    graphSwitching,
+                    sysCtxHandle,
+                    &binaryInfo
+            )) {
+            QNN_ERROR("Failed to map context Binary for contextIdx: %zu", contextIdx);
+            return false;
+        }
+
+        GraphInfo_t** graphsInfo{nullptr};
+        if (!copyMetadataToGraphsInfo(binaryInfo, graphsInfo, graphsCount)) {
+            QNN_ERROR("Failed to copy metadata for graph index = %zu", contextIdx);
+            freeGraphsInfo(&graphsInfo, graphsCount);
+            if (contextIdx > 0) freeGraphsInfo(&m_graphsInfo, m_graphsCount);
+            return false;
+        }
+
+        if (graphCountPerContext == -1) {
+            graphCountPerContext = graphsCount;
+            m_graphsInfo         = (GraphInfo_t**)calloc(
+                    graphCountPerContext * cachedBinariesPathVec.size(), sizeof(GraphInfo_t*)
+            );
+        } else if (graphCountPerContext != graphsCount) {
+            QNN_ERROR(
+                    "Different len(graphs) found in different context files. Found %u vs %u",
+                    graphsCount,
+                    graphCountPerContext
+            );
+            freeGraphsInfo(&graphsInfo, graphsCount);
+            if (contextIdx > 0) freeGraphsInfo(&m_graphsInfo, m_graphsCount);
+            return false;
+        }
+
+        auto _stop = std::chrono::steady_clock::now(); // context Loading stop
+        QNN_DEBUG(
+                "Loading contexts[%lu] took: %lld us",
+                contextIdx,
+                std::chrono::duration_cast<std::chrono::microseconds>(_stop - _start).count()
+        );
+        graphsPerContext.push_back(graphsCount);
+        for (int gIdx = 0; gIdx < graphsCount; gIdx++) {
+            m_graphsInfo[m_graphsCount] = graphsInfo[gIdx];
+            m_graphIdxToContextIdx[m_graphsCount] = contextIdx;
+            m_graphsCount++;
+        }
+        m_qnnSystemInterface.systemContextFree(sysCtxHandle);
+        sysCtxHandle = nullptr;
+    }
+
+    // Iterate over all the tensors across the graphs Info and build info about the IO space it is requiring.
+    if(false == parseIOTensorsAndAccumulate()){
+        QNN_ERROR("Error in parsing the IO tensor info for all context binaries");
+        return false;
+    }
+
+    bool isIOBufferMgrInitialized = false;
+
+    for (size_t contextIdx = 0; contextIdx < cachedBinariesPathVec.size(); contextIdx++) {
+
+        if (nullptr == m_qnnInterface.contextCreateFromBinary) {
+            QNN_ERROR(
+                    "contextCreateFromBinaryFnHandle is nullptr for context index = %zu", contextIdx
+            );
+            freeGraphsInfo(&m_graphsInfo, m_graphsCount);
+            return false;
+        }
+
+        Qnn_ContextHandle_t contextHandle{nullptr};
+
+        uint32_t customConfigCountSF = 0;
+
+#ifdef SPILLFILL
+        if (spill_fill_buffer_size > 0) {
+            QnnHtpContext_GroupRegistration_t groupInfo{nullptr};
+            if (contextIdx == 0) {
+                groupInfo.firstGroupHandle = 0x0;
+            } else {
+                groupInfo.firstGroupHandle = first_contextHandle;
+            }
+            groupInfo.maxSpillFillBuffer     = spill_fill_buffer_size;
+            customConfigSF.groupRegistration = groupInfo;
+
+            QnnContext_Config_t** cfgs{nullptr};
+            customConfigCountSF = 1;
+            cfgs                = (QnnContext_Config_t**)malloc(
+                    customConfigCountSF * sizeof(QnnContext_Config_t*)
+            );
+            cfgs[0]               = (QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t));
+            cfgs[0]->option       = QNN_CONTEXT_CONFIG_OPTION_CUSTOM;
+            cfgs[0]->customConfig = reinterpret_cast<QnnContext_CustomConfig_t>(&customConfigSF);
+            if (true != mergeAllContextConfigs(
+                                &allContextConfigs,
+                                cfgs,
+                                allContextConfigs,
+                                customConfigCountSF,
+                                contextConfigCount + customConfigCount
+                        )) {
+                QNN_ERROR("Error merging custom and context configs");
+                return false;
+            }
+        }
+#endif
+
+        uint32_t customConfigCountIOMemEstimate = 0;
+#if 1 // Adding IO_MEM_ESTIMATION
+       QnnHtpContext_CustomConfig_t ioMemEstimation;
+            ioMemEstimation.option = QNN_HTP_CONTEXT_CONFIG_OPTION_IO_MEM_ESTIMATION;
+            ioMemEstimation.ioMemEstimation = true;
+
+            QnnContext_Config_t** cfgs{nullptr};
+
+            customConfigCountIOMemEstimate = 1;
+
+            cfgs = (QnnContext_Config_t**)malloc(
+                    customConfigCountIOMemEstimate * sizeof(QnnContext_Config_t*)
+            );
+            cfgs[0]         = (QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t));
+            cfgs[0]->option = QNN_CONTEXT_CONFIG_OPTION_CUSTOM;
+            cfgs[0]->customConfig =
+                    reinterpret_cast<QnnContext_CustomConfig_t>(&ioMemEstimation);
+            if (true != mergeAllContextConfigs(
+                    &allContextConfigs,
+                    cfgs,
+                    allContextConfigs,
+                    customConfigCountIOMemEstimate,
+                    contextConfigCount + customConfigCount + customConfigCountSF
+            )) {
+                QNN_ERROR("Error merging custom and context configs");
+                return false;
+            }
+#endif
+
+        if (mmap_budget > 0) {
+            QnnHtpContext_CustomConfig_t customConfigReadBudget;
+            customConfigReadBudget.option = QNN_HTP_CONTEXT_CONFIG_OPTION_FILE_READ_MEMORY_BUDGET;
+            customConfigReadBudget.fileReadMemoryBudgetInMb = mmap_budget;
+
+            QnnContext_Config_t** cfgs{nullptr};
+
+            uint32_t customConfigCountReadBudget = 1;
+
+            cfgs = (QnnContext_Config_t**)malloc(
+                    customConfigCountReadBudget * sizeof(QnnContext_Config_t*)
+            );
+            cfgs[0]         = (QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t));
+            cfgs[0]->option = QNN_CONTEXT_CONFIG_OPTION_CUSTOM;
+            cfgs[0]->customConfig =
+                    reinterpret_cast<QnnContext_CustomConfig_t>(&customConfigReadBudget);
+            if (true != mergeAllContextConfigs(
+                                &allContextConfigs,
+                                cfgs,
+                                allContextConfigs,
+                                customConfigCountReadBudget,
+                                contextConfigCount + customConfigCount + customConfigCountSF + customConfigCountIOMemEstimate
+                        )) {
+                QNN_ERROR("Error merging custom and context configs");
+                return false;
+            }
+        }
+
+
+        auto start = std::chrono::steady_clock::now(); // context Deserialization starts
+
+        auto errCode = m_qnnInterface.contextCreateFromBinary(
+                m_backendHandle,
+                m_deviceHandle,
+                (const QnnContext_Config_t**)allContextConfigs,
+                (const void*)bufferVec[contextIdx].get(),
+                allBuffSizes[contextIdx],
+                &contextHandle,
+                nullptr // profile handle
+
+        );
+
+        auto stop = std::chrono::steady_clock::now(); // context Deserialization stops
+        QNN_DEBUG(
+                "Initializing context[%lu] with %u graphs took: %lld us",
+                contextIdx,
+                graphsPerContext[contextIdx],
+                std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count()
+        );
+
+        if(!isIOBufferMgrInitialized){
+
+            if (true != m_ioBufferMgr->initialize(contextHandle)) {
+                QNN_ERROR("qnn-htp: failure to initialize IOTensor");
+                return false;
+            }
+
+            isIOBufferMgrInitialized = true;
+
+            // Calculate total allocation sizes and offset of each tensor within its allocated buffer
+            if (m_ioBufferMgr->allocateBuffers(m_contextAllocMap, m_tensorAllocInfo) == false){
+                QNN_ERROR("Failed to allocate the Memory across the context buffers.");
+                return false;
+            }
+
+        }
+
+        if (errCode != QNN_SUCCESS) {
+            QNN_ERROR(
+                    "Could not create context from binary for context index = %zu : err %d",
+                    contextIdx,
+                    (int)errCode
+            );
+             freeGraphsInfo(&m_graphsInfo, m_graphsCount);
+            return false;
+        }
+
+        // Clearing buffer which is deseralized to reduce Memory footprint
+        bufferVec[contextIdx].reset();
+
+        if (m_profileBackendHandle) {
+            extractBackendProfilingInfo(m_profileBackendHandle);
+        }
+
+        m_contextVec.push_back(contextHandle);
+        for (int n_graph = 0; n_graph < graphCountPerContext; n_graph++) {
+
+            uint32_t graphIdx = contextIdx*graphCountPerContext + n_graph ;
+
+            GraphInfo_t* cur_graph = m_graphsInfo[graphIdx];
+            m_contextMap[cur_graph]       = contextHandle;
+
+            if (nullptr == m_qnnInterface.graphRetrieve) {
+                QNN_ERROR("graphRetrieveFnHandle is nullptr.");
+                freeGraphsInfo(&m_graphsInfo, m_graphsCount);
+                return false;
+            }
+
+            if (!m_graphsInfo || QNN_SUCCESS != m_qnnInterface.graphRetrieve(
+                                                    contextHandle,
+                                                    cur_graph->graphName,
+                                                    &(cur_graph->graph)
+                                            )) {
+                QNN_ERROR("Unable to retrieve graph handle for graph index = %d", graphIdx);
+                freeGraphsInfo(&m_graphsInfo, m_graphsCount);
+                return false;
+            }
+
+            // Register all the Tensors per graph.
+            if(false == registerTensorsWithBackend(graphIdx)){
+                QNN_ERROR("Unable to MemRegister IO Tensors for graph index = %d", graphIdx);
+                freeGraphsInfo(&m_graphsInfo, m_graphsCount);
+                return false;
+            }
+
+        }
+
+
+#ifdef SPILLFILL
+        if (spill_fill_buffer_size > 0 && contextIdx == 0) {
+            first_contextHandle = contextHandle;
+        }
+#endif
+
+    }
+
+    m_isContextCreated = true;
+
+    QNN_DEBUG(
+            "Initialized %u graphs from %lu contexts", m_graphsCount, cachedBinariesPathVec.size()
+    );
+
+    if (true != freeContextConfigs(contextConfigs, contextConfigCount)) {
+        QNN_ERROR("Couldn't free context configs");
+        return false;
+    }
+    if (allContextConfigs) {
+        free(allContextConfigs);
+    }
+
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->afterCreateFromBinary()) {
+            QNN_ERROR("Extensions Failure in afterCreateFromBinary()");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+#if QUALLA_QNN_API_VERSION >= 21700
+bool QnnApi::checkCapabilityOfCreateAsync(bool& propRet) {
+    if (nullptr == m_qnnInterface.propertyHasCapability) {
+        QNN_ERROR("propertyHasCapability is nullptr.......");
+        return false;
+    }
+    if (QNN_PROPERTY_SUPPORTED == m_qnnInterface.propertyHasCapability(
+                                          QNN_PROPERTY_CONTEXT_SUPPORT_CREATE_FROM_BINARY_LIST_ASYNC
+                                  )) {
+        propRet = true;
+    } else {
+        propRet = false;
+    }
+    return true;
+}
+
+bool freeContextParams(QnnContext_Params_t** context_params_list, uint32_t numParams) {
+    if (context_params_list == nullptr || *context_params_list == nullptr) {
+        return false;
+    }
+    for (uint32_t i = 0; i < numParams; i++) {
+        if (nullptr != context_params_list[i]) {
+            delete context_params_list[i];
+        }
+    }
+    return true;
+}
+
+void QnnApi::contextNotifyFn(
+        Qnn_ContextHandle_t                          context,
+        Qnn_GraphHandle_t                            graph,
+        const char*                                  graph_name,
+        QnnContext_createFromBinaryAsyncNotifyType_t completeType,
+        void*                                        notifyParam,
+        Qnn_ErrorHandle_t                            status
+) {
+    std::pair<QnnApi*, uint32_t>* pair =
+            reinterpret_cast<std::pair<QnnApi*, uint32_t>*>(notifyParam);
+    QnnApi*  QnnApi    = pair->first;
+    uint32_t contextId = pair->second;
+
+    if (completeType ==
+        QnnContext_createFromBinaryAsyncNotifyType_t::QNN_CONTEXT_NOTIFY_TYPE_CONTEXT_INIT) {
+        QnnApi->updateContext(context, contextId);
+    } else if (completeType ==
+               QnnContext_createFromBinaryAsyncNotifyType_t::QNN_CONTEXT_NOTIFY_TYPE_GRAPH_INIT) {
+        QnnApi->updateQnnApiGraphsandContextsInfo(graph_name, graph, contextId);
+    }
+}
+
+bool QnnApi::createFromBinaryListAsync(
+        std::vector<std::string>        cachedBinariesPathVec,
+        ContextConfigs                  contextConfig,
+        int64_t                         spill_fill_buffer_size,
+        uint64_t                        mmap_budget,
+        bool                            graphSwitching,
+        const std::vector<std::string>& execSelectGraphs,
+        bool                            loadSelectGraphs
+) {
+    auto _start = std::chrono::steady_clock::now();
+
+    // Let backendExtensions populate configs
+    QnnContext_Config_t** customConfigs{nullptr};
+    uint32_t              customConfigCount{0};
+    std::map<std::string, std::tuple<QnnContext_Config_t **, uint32_t>> contextKeyToCustomConfigsMap;
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->beforeCreateContextsFromBinaryList(
+                    &contextKeyToCustomConfigsMap,&customConfigs, &customConfigCount
+            )) {
+            QNN_ERROR("Extensions Failure in beforeCreateContextsFromBinaryList()");
+            return false;
+        }
+    }
+
+
+    QnnContext_Config_t** contextConfigs     = nullptr;
+    uint32_t              contextConfigCount = 0;
+    if (true != getContextConfigs(
+                        &contextConfigs,
+                        contextConfigCount,
+                        contextConfig.priority,
+                        graphSwitching,
+                        execSelectGraphs,
+                        loadSelectGraphs
+                )) {
+        QNN_ERROR("Couldn't populate context configs");
+        return false;
+    }
+
+    // Merge BE specific and agnostic configs
+    QnnContext_Config_t** allContextConfigs{nullptr};
+    if (true != mergeAllContextConfigs(
+                        &allContextConfigs,
+                        customConfigs,
+                        contextConfigs,
+                        customConfigCount,
+                        contextConfigCount
+                )) {
+        QNN_ERROR("Error merging custom and context configs");
+        return false;
+    }
+
+    if (nullptr == m_qnnSystemInterface.systemContextCreate ||
+        nullptr == m_qnnSystemInterface.systemContextGetBinaryInfo ||
+        nullptr == m_qnnSystemInterface.systemContextFree) {
+        QNN_ERROR("QNN System function pointers are not populated.");
+        return false;
+    }
+
+    graphCountPerContext = getGraphCountPerContext();
+
+    std::vector<QnnContext_Params_t*> context_params_list(cachedBinariesPathVec.size() +1, nullptr);
+    std::vector<std::shared_ptr<uint8_t>> bufferVec(cachedBinariesPathVec.size());
+    // for every context's graph info
+    GraphInfo_t*** graphsInfo =
+            (GraphInfo_t***)calloc(cachedBinariesPathVec.size(), sizeof(GraphInfo_t**));
+    uint32_t graphsTotalNum = 0;
+
+    for (size_t contextIdx = 0; contextIdx < cachedBinariesPathVec.size(); contextIdx++) {
+        auto _startPerContext = std::chrono::steady_clock::now();
+        uint64_t bufferSize{0};
+        std::shared_ptr<uint8_t>& buffer{bufferVec[contextIdx]};
+        uint32_t graphsCount;
+
+        // read serialized binary into a byte buffer
+        bufferSize = getFileSize(cachedBinariesPathVec[contextIdx]);
+        if (0 == bufferSize) {
+            QNN_ERROR(
+                    "Received path to an empty file for context index = %zu. Nothing to deserialize.",
+                    contextIdx
+            );
+            return false;
+        }
+
+        // inspect binary info
+        QnnSystemContext_Handle_t sysCtxHandle{nullptr};
+        if (QNN_SUCCESS != m_qnnSystemInterface.systemContextCreate(&sysCtxHandle)) {
+            QNN_ERROR("Could not create system handle for context index = %zu", contextIdx);
+            return false;
+        }
+        const QnnSystemContext_BinaryInfo_t* binaryInfo{nullptr};
+        if (!mapAndGetContextBinaryInfo(
+                    m_mmapContextBins,
+                    buffer,
+                    cachedBinariesPathVec[contextIdx],
+                    bufferSize,
+                    contextIdx,
+                    graphSwitching,
+                    sysCtxHandle,
+                    &binaryInfo
+            )) {
+            QNN_ERROR("Failed to map context Binary.");
+            return false;
+        }
+
+        if (!copyMetadataToGraphsInfo(binaryInfo, graphsInfo[contextIdx], graphsCount)) {
+            QNN_ERROR("Failed to copy metadata for graph index = %zu", contextIdx);
+            freeGraphsInfo(&graphsInfo[contextIdx], graphsCount);
+            freeGraphsInfo(&m_graphsInfo, graphsCount);
+            return false;
+        }
+
+        if (graphCountPerContext == -1) {
+            graphCountPerContext = graphsCount;
+            graphsTotalNum       = graphCountPerContext * cachedBinariesPathVec.size();
+            m_graphsInfo         = (GraphInfo_t**)calloc(graphsTotalNum, sizeof(GraphInfo_t*));
+
+        } else if (graphCountPerContext != graphsCount) {
+            QNN_ERROR(
+                    "Different len(graphs) found in different context files. Found %u vs %u",
+                    graphsCount,
+                    graphCountPerContext
+            );
+            freeGraphsInfo(&graphsInfo[contextIdx], graphsCount);
+            freeGraphsInfo(&m_graphsInfo, graphsTotalNum);
+            return false;
+        }
+        for (int gIdx = 0; gIdx < graphsCount; gIdx++) {
+            int graphIdxOfAll           = contextIdx * graphsCount + gIdx;
+            m_graphsInfo[graphIdxOfAll] = graphsInfo[contextIdx][gIdx];
+            m_graphNameToInfo[m_graphsInfo[graphIdxOfAll]->graphName] = m_graphsInfo[graphIdxOfAll];
+        }
+        m_qnnSystemInterface.systemContextFree(sysCtxHandle);
+        sysCtxHandle = nullptr;
+
+        uint32_t customConfigCountSF = 0;
+
+        if (mmap_budget > 0) {
+            QnnHtpContext_CustomConfig_t customConfigReadBudget;
+            customConfigReadBudget.option = QNN_HTP_CONTEXT_CONFIG_OPTION_FILE_READ_MEMORY_BUDGET;
+            customConfigReadBudget.fileReadMemoryBudgetInMb = mmap_budget;
+
+            QnnContext_Config_t** cfgs{nullptr};
+
+            uint32_t customConfigCountReadBudget = 1;
+
+            cfgs = (QnnContext_Config_t**)malloc(
+                    customConfigCountReadBudget * sizeof(QnnContext_Config_t*)
+            );
+            cfgs[0]         = (QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t));
+            cfgs[0]->option = QNN_CONTEXT_CONFIG_OPTION_CUSTOM;
+            cfgs[0]->customConfig =
+                    reinterpret_cast<QnnContext_CustomConfig_t>(&customConfigReadBudget);
+            if (true != mergeAllContextConfigs(
+                                &allContextConfigs,
+                                cfgs,
+                                allContextConfigs,
+                                customConfigCountReadBudget,
+                                contextConfigCount + customConfigCount + customConfigCountSF
+                        )) {
+                QNN_ERROR("Error merging custom and context configs");
+                return false;
+            }
+        }
+
+        if (m_profileBackendHandle) {
+            extractBackendProfilingInfo(m_profileBackendHandle);
+        }
+
+        // passing class QnnApi pointer into callback funtion(notifyFn)
+        std::pair<QnnApi*, uint32_t>* notifyParam =
+                new std::pair<QnnApi*, uint32_t>(this, (size_t)contextIdx);
+        QnnContext_Params_t* contextParam = new QnnContext_Params_t{
+                .version = QNN_CONTEXT_PARAMS_VERSION_1,
+                .v1 =
+                        QnnContext_ParamsV1_t{
+                                (const QnnContext_Config_t**)allContextConfigs,
+                                (const void*)buffer.get(),
+                                bufferSize,
+                                nullptr,
+                                QnnApi::contextNotifyFn,
+                                (void*)notifyParam
+                        }
+        };
+
+        context_params_list[contextIdx] = contextParam;
+
+        auto _stop = std::chrono::steady_clock::now();
+    QNN_DEBUG(
+            "Loading contexts[%lu] took: %lld us",
+            contextIdx,
+            std::chrono::duration_cast<std::chrono::microseconds>(_stop - _startPerContext).count()
+    );
+    }
+
+    if (nullptr == m_qnnInterface.contextCreateFromBinaryListAsync) {
+        QNN_ERROR("contextCreateFromBinaryListAsyncFnHandle is nullptr");
+        freeGraphsInfo(&m_graphsInfo, graphsTotalNum);
+        freeContextParams(context_params_list.data(), cachedBinariesPathVec.size());
+        return false;
+    }
+
+    auto start = std::chrono::steady_clock::now();
+
+
+    auto errCode = m_qnnInterface.contextCreateFromBinaryListAsync(
+                m_backendHandle,
+                m_deviceHandle,
+                const_cast<const QnnContext_Params_t**>(context_params_list.data()),
+                (const QnnContext_Config_t**)allContextConfigs,
+                nullptr
+        );
+
+
+    auto stop = std::chrono::steady_clock::now();
+    QNN_DEBUG(
+            "Initializing %lu context with %u graphs took: %lld us",
+            cachedBinariesPathVec.size(),
+            graphsTotalNum,
+            std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count()
+    );
+
+    // Explicitly free the context binary buffers. This ensures that the lifecycle
+    // of the buffers outlasts the API call where their raw pointers are referenced.
+    for (auto contextBinaryBuffer : bufferVec) {
+        QNN_DEBUG("Freeing context binary buffer @%p", contextBinaryBuffer.get());
+        contextBinaryBuffer.reset();
+    }
+
+    if (errCode != QNN_SUCCESS) {
+        QNN_ERROR(
+                "Could not create context from binary List Async for context, err %d", (int)errCode
+        );
+        freeGraphsInfo(&m_graphsInfo, graphsTotalNum);
+        freeContextParams(context_params_list.data(), cachedBinariesPathVec.size());
+        return false;
+    }
+
+    // set graphInfo in m_graphsInfo
+    for (size_t graphIdx = 0; graphIdx < m_graphsCount; graphIdx++) {
+        int      contextIdxOfgraphsInfo  = graphIdx / graphCountPerContext;
+        uint32_t contexIdxofCurrGraph = m_graphNameToContextIdx[m_graphsInfo[graphIdx]->graphName];
+        m_graphsInfo[graphIdx] =
+                graphsInfo[contextIdxOfgraphsInfo][graphIdx % graphCountPerContext];
+        m_contextMap[m_graphsInfo[graphIdx]] = m_contextIdtoHandle[contexIdxofCurrGraph];
+    }
+
+    m_isContextCreated = true;
+
+    if (true != freeContextConfigs(contextConfigs, contextConfigCount)) {
+        QNN_ERROR("Couldn't free context configs");
+        return false;
+    }
+
+    if (true != freeContextParams(context_params_list.data(), cachedBinariesPathVec.size())) {
+        QNN_ERROR("Couldn't free context params list");
+        return false;
+    }
+
+    if (allContextConfigs) {
+        free(allContextConfigs);
+    }
+
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->afterCreateContextsFromBinaryList()) {
+            QNN_ERROR("Extensions Failure in afterCreateContextsFromBinaryList()");
+            return false;
+        }
+    }
+    return true;
+}
+#endif
+
+static std::vector<std::string> __split(std::string_view str, char delim) {
+    std::vector<std::string> split;
+
+    size_t i = 0, p = 0;
+
+    for (; i <= str.size(); ++i) {
+        if (i == str.size() || str[i] == delim) {
+            split.push_back(std::string(str.data() + p, i - p));
+            p = ++i;
+        }
+    }
+
+    return split;
+}
+
+bool QnnApi::registerOpPackage(std::string opPackagePath) {
+    const size_t pathIdx              = 0;
+    const size_t interfaceProviderIdx = 1;
+    const size_t targetIdx            = 2;
+
+    auto opPackage = __split(opPackagePath, ':');
+
+    if (opPackage.size() != 2 && opPackage.size() != 3) {
+        return false;
+    }
+
+    if (nullptr == m_qnnInterface.backendRegisterOpPackage) {
+        return false;
+    }
+
+    const char* target = nullptr;
+    if (opPackage.size() == 3) {
+        target = (char*)opPackage[targetIdx].c_str();
+    }
+
+    auto returnStatus = m_qnnInterface.backendRegisterOpPackage(
+            m_backendHandle,
+            (char*)opPackage[pathIdx].c_str(),
+            (char*)opPackage[interfaceProviderIdx].c_str(),
+            target
+    );
+    if (QNN_SUCCESS != returnStatus) {
+        QNN_ERROR(
+                "Could not register OpPackage backend due to error = %llu",
+                (unsigned long long)returnStatus
+        );
+        return false;
+    }
+
+    return true;
+}
+
+// Performance Setting for HTP
+bool QnnApi::initializePerformance() {
+
+    QnnDevice_Infrastructure_t deviceInfra = nullptr;
+    if (QNN_SUCCESS != m_qnnInterface.deviceGetInfrastructure(&deviceInfra)) {
+        QNN_ERROR("Failure in deviceGetInfrastructure()");
+        return false;
+    }
+
+    QnnHtpDevice_Infrastructure_t* htpInfra =
+            static_cast<QnnHtpDevice_Infrastructure_t*>(deviceInfra);
+    m_perfInfra       = &(htpInfra->perfInfra);
+    uint32_t deviceId = 0;
+    uint32_t coreId   = 0;
+    if (QNN_SUCCESS != m_perfInfra->createPowerConfigId(deviceId, coreId, &m_powerConfigId)) {
+        QNN_ERROR("Failure in createPowerConfigId()");
+        return false;
+    }
+
+    return true;
+}
+
+bool QnnApi::destroyPerformance() {
+    if (nullptr != m_perfInfra &&
+        QNN_SUCCESS != m_perfInfra->destroyPowerConfigId(m_powerConfigId)) {
+        QNN_ERROR("Failure in destroyPowerConfigId()");
+        return false;
+    }
+
+    return true;
+}
+
+bool QnnApi::boostPerformance() {
+    // Initialize the power config and select the voltage corner values for the performance setting.
+    QnnHtpPerfInfrastructure_PowerConfig_t powerConfig;
+    memset(&powerConfig, 0, sizeof(powerConfig));
+
+    powerConfig.option                     = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3;
+    powerConfig.dcvsV3Config.dcvsEnable    = 1;
+    powerConfig.dcvsV3Config.setDcvsEnable = 1;
+    powerConfig.dcvsV3Config.contextId     = m_powerConfigId;
+
+    // refer QnnHtpPerfInfrastructure.h
+    powerConfig.dcvsV3Config.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE;
+
+    // Set Sleep-Disable latency parameter
+    powerConfig.dcvsV3Config.setSleepDisable = 0;
+    powerConfig.dcvsV3Config.sleepDisable    = 0;
+
+    // Set Sleep latency parameter
+    powerConfig.dcvsV3Config.setSleepLatency = 0;
+    powerConfig.dcvsV3Config.sleepLatency    = 1000; // range 40-2000 micro sec
+
+    // Set Bus Clock Parameters (refer QnnHtpPerfInfrastructure.h)
+    powerConfig.dcvsV3Config.setBusParams           = 1;
+    powerConfig.dcvsV3Config.busVoltageCornerMin    = DCVS_VOLTAGE_VCORNER_TURBO_PLUS;
+    powerConfig.dcvsV3Config.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_TURBO_PLUS;
+    powerConfig.dcvsV3Config.busVoltageCornerMax    = DCVS_VOLTAGE_VCORNER_TURBO_PLUS;
+
+    // set Core Clock Parameters (refer QnnHtpPerfInfrastructure.h)
+    powerConfig.dcvsV3Config.setCoreParams           = 1;
+    powerConfig.dcvsV3Config.coreVoltageCornerMin    = DCVS_VOLTAGE_VCORNER_TURBO_PLUS;
+    powerConfig.dcvsV3Config.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_TURBO_PLUS;
+    powerConfig.dcvsV3Config.coreVoltageCornerMax    = DCVS_VOLTAGE_VCORNER_TURBO_PLUS;
+
+    // Set power config with different performance parameters
+    const QnnHtpPerfInfrastructure_PowerConfig_t* powerConfigs[] = {&powerConfig, NULL};
+    if (QNN_SUCCESS != m_perfInfra->setPowerConfig(m_powerConfigId, powerConfigs)) {
+        QNN_ERROR("Failure in setPowerConfig() from boostPerformance");
+        return false;
+    }
+
+    return true;
+}
+
+bool QnnApi::resetPerformance() {
+    // Initialize the power config and select the voltage corner values for the performance setting.
+    QnnHtpPerfInfrastructure_PowerConfig_t powerConfig;
+    memset(&powerConfig, 0, sizeof(powerConfig));
+
+    powerConfig.option                     = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3;
+    powerConfig.dcvsV3Config.dcvsEnable    = 1;
+    powerConfig.dcvsV3Config.setDcvsEnable = 1;
+    powerConfig.dcvsV3Config.contextId     = m_powerConfigId;
+
+    // refer QnnHtpPerfInfrastructure.h
+    powerConfig.dcvsV3Config.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE;
+
+    // Set Sleep-Disable latency parameter
+    powerConfig.dcvsV3Config.setSleepDisable = 0;
+    powerConfig.dcvsV3Config.sleepDisable    = 0;
+
+    // Set Sleep latency parameter
+    powerConfig.dcvsV3Config.setSleepLatency = 0;
+    powerConfig.dcvsV3Config.sleepLatency    = 1000; // range 40-2000 micro sec
+
+    // Set Bus Clock Parameters (refer QnnHtpPerfInfrastructure.h)
+    powerConfig.dcvsV3Config.setBusParams           = 1;
+    powerConfig.dcvsV3Config.busVoltageCornerMin    = DCVS_VOLTAGE_VCORNER_NOM;
+    powerConfig.dcvsV3Config.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM;
+    powerConfig.dcvsV3Config.busVoltageCornerMax    = DCVS_VOLTAGE_VCORNER_TURBO;
+
+    // set Core Clock Parameters (refer QnnHtpPerfInfrastructure.h)
+    powerConfig.dcvsV3Config.setCoreParams           = 1;
+    powerConfig.dcvsV3Config.coreVoltageCornerMin    = DCVS_VOLTAGE_VCORNER_NOM;
+    powerConfig.dcvsV3Config.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM;
+    powerConfig.dcvsV3Config.coreVoltageCornerMax    = DCVS_VOLTAGE_VCORNER_TURBO;
+
+    // Set power config with different performance parameters
+    const QnnHtpPerfInfrastructure_PowerConfig_t* powerConfigs[] = {&powerConfig, NULL};
+    if (QNN_SUCCESS != m_perfInfra->setPowerConfig(m_powerConfigId, powerConfigs)) {
+        QNN_ERROR("Failure in setPowerConfig() from resetPerformance");
+        return false;
+    }
+
+    return true;
+}
+
+bool QnnApi::initialize(
+        std::string                     backendPath,
+        std::vector<std::string>        modelPathOrCachedBinaryPathVec,
+        BackendExtensionsConfigs        backendExtensionsConfig,
+        PerfProfile                     parsedPerfProfile,
+        ContextConfigs                  contextConfig,
+        std::vector<GraphConfigs>       graphConfigs,
+        bool                            loadFromCachedBinary,
+        std::string                     systemLibraryPath,
+        bool                            debugModeRequested,
+        int64_t                         spill_fill_buffer_size,
+        bool                            mmapContextBins,
+        bool                            asyncInit,
+        uint64_t                        mmap_budget,
+        bool                            debug_qnn,
+        bool                            graphSwitching,
+        const std::vector<std::string>& execSelectGraphs,
+        bool                            loadSelectGraphs
+) {
+    if (modelPathOrCachedBinaryPathVec.size() > 1 && false == loadFromCachedBinary) {
+        QNN_ERROR("Currently only 1 model file is supported for this framework! \
+            Although multiple context files are supported!");
+        return false;
+    }
+
+    m_mmapContextBins = mmapContextBins;
+
+    // Setting up Debug mode
+    m_DebugModeRequested = debugModeRequested;
+    if (m_DebugModeRequested) {
+        QNN_WARN("Warning: Debug mode set to true.");
+    }
+
+    // Initialize the QNN run time
+    if (false == getQnnInterface(backendPath)) {
+        QNN_ERROR("Qnn getQnnInterface FAILED!");
+        return false;
+    }
+
+    if (loadFromCachedBinary) {
+        if (false == getQnnSystemInterface(systemLibraryPath)) {
+            QNN_ERROR("Qnn getQnnSystemInterface FAILED!");
+            return false;
+        }
+    } else {
+        if (false == loadModel(modelPathOrCachedBinaryPathVec[0])) {
+            QNN_ERROR("Loading model FAILED!");
+            return false;
+        }
+    }
+
+    QnnLog_Level_t logLevel = QNN_LOG_LEVEL_WARN;
+    if (false == initializeLogging(logLevel, debug_qnn)) {
+        QNN_ERROR("Unable to Initialize logging in backend");
+        return false;
+    }
+
+    // initialize backend extensions
+#ifdef QUALLA_INTERNAL_QNN_SDK
+    // Initialize backendExtensions only when both backend ext config and backend ext lib are provided
+    if (!backendExtensionsConfig.configFilePath.empty() &&
+        false == initializeBackendExtensions(
+                         backendExtensionsConfig, parsedPerfProfile, debug_qnn
+                 )) {
+        QNN_WARN("Failure in initializing backend extensions.");
+    }
+#else
+    if (false ==
+        initializeBackendExtensions(backendExtensionsConfig, parsedPerfProfile, debug_qnn)) {
+        QNN_ERROR("Failure in initializing backend extensions.");
+        return false;
+    }
+#endif
+    if (false == initializeBackend()) {
+        QNN_ERROR("Qnn initializeBackend FAILED!");
+        return false;
+    }
+    if (false == createDevice()) {
+        QNN_ERROR("Device Creation failure");
+        setDeviceStatus(false);
+        return false;
+    } else {
+        setDeviceStatus(true);
+    }
+    if (!loadFromCachedBinary) {
+        if (false == createContext(contextConfig)) {
+            QNN_ERROR("Qnn createContext FAILED!");
+            return false;
+        }
+        if (false == composeGraphs(graphConfigs)) {
+            QNN_ERROR("composeGraphs FAILED!");
+            return false;
+        }
+        if (false == finalizeGraphs()) {
+            QNN_ERROR("finalizeGraphs FAILED!");
+            return false;
+        }
+    } else {
+        bool cfb_ret   = false;
+        bool asyncCapability = false;
+#if QUALLA_QNN_API_VERSION >= 21700
+        if(asyncInit == true){
+            if (!checkCapabilityOfCreateAsync(asyncCapability)) {
+                QNN_ERROR("Capabilty checked failed");
+                return false;
+            }
+            asyncInit = asyncCapability && asyncInit;
+        }
+        if (asyncInit == true) {
+            QNN_INFO("Using create From Binary List Async");
+            cfb_ret = createFromBinaryListAsync(
+                    modelPathOrCachedBinaryPathVec,
+                    contextConfig,
+                    spill_fill_buffer_size,
+                    mmap_budget,
+                    graphSwitching,
+                    execSelectGraphs,
+                    loadSelectGraphs
+            );
+            if (cfb_ret == false) {
+                QNN_ERROR("Create From Binary List Async FAILED!");
+                return false;
+            }
+
+        } else {
+#endif
+            QNN_INFO("Using create From Binary");
+            cfb_ret = createFromBinary(
+                    modelPathOrCachedBinaryPathVec,
+                    contextConfig,
+                    spill_fill_buffer_size,
+                    mmap_budget,
+                    graphSwitching,
+                    execSelectGraphs,
+                    loadSelectGraphs
+            );
+            if (false == cfb_ret) {
+                QNN_ERROR("Create From Binary FAILED!");
+                return false;
+            }
+        }
+#if QUALLA_QNN_API_VERSION >= 21700
+    }
+#endif
+
+    // if (false == initializePerformance()) {
+    //     QNN_ERROR("initialize Performance FAILED!");
+    //     return false;
+    // }
+
+    for (size_t graphIdx = 0; graphIdx < m_graphsCount; graphIdx++) {
+        m_graphNameToIndex[m_graphsInfo[graphIdx]->graphName] = graphIdx;
+    }
+
+#if NSP_LOG_LEVEL > 1
+    for (const auto& graphNameIndex : m_graphNameToIndex) {
+        QNN_DEBUG(
+                "Found Graph name %s corresponding to index %d",
+                graphNameIndex.first.c_str(),
+                graphNameIndex.second
+        );
+    }
+
+    fprintf(stderr, "context_handles = [");
+    for (auto ctx_handle : m_contextVec)
+        fprintf(stderr, "%p, ", ctx_handle);
+    fprintf(stderr, "]\n");
+#endif
+    return true;
+}
+
+bool QnnApi::initialize(
+        std::string               backendPath,
+        std::string               modelPath,
+        std::string               opPackage,
+        ContextConfigs            contextConfig,
+        std::vector<GraphConfigs> graphConfigs,
+        uint32_t*                 inputDim,
+        uint32_t                  inputRank,
+        uint32_t*                 outputDim,
+        uint32_t                  outputRank,
+        uint32_t*                 kvDim,
+        uint32_t                  kvRank,
+        Qnn_Param_t*              params,
+        uint32_t                  numParams,
+        bool                      debugModeRequested
+) {
+    // Setting up Debug mode
+    m_DebugModeRequested = debugModeRequested;
+    if (m_DebugModeRequested) {
+        QNN_WARN("Warning: Debug mode set to true.");
+    }
+
+    // Initialize the QNN run time
+    if (false == getQnnInterface(backendPath)) {
+        QNN_ERROR("Qnn getQnnInterface FAILED!");
+        return false;
+    }
+
+    QnnLog_Level_t logLevel = QNN_LOG_LEVEL_WARN;
+    if (false == initializeLogging(logLevel, false)) {
+        QNN_ERROR("Unable to Initialize logging in backend");
+    }
+
+    if (false == initializeBackend()) {
+        QNN_ERROR("Qnn initializeBackend FAILED!");
+        return false;
+    }
+
+    //CPU does not support createDevice.
+    setDeviceStatus(false);
+    if (false == registerOpPackage(opPackage)) {
+        QNN_ERROR("Qnn initializeBackend FAILED!");
+        return false;
+    }
+
+// Change to 1 to enable QNN Basic profiling
+#if 0
+    if (false == initProfiling()) {
+        QNN_ERROR("Profiling init failure");
+        return false;
+    }
+#endif
+    if (false == loadModel(modelPath)) {
+        QNN_ERROR("Loading model FAILED!");
+        return false;
+    }
+    if (false == createContext(contextConfig)) {
+        QNN_ERROR("Qnn createContext FAILED!");
+        return false;
+    }
+    if (false == composeGraphs(
+                         graphConfigs, inputDim, inputRank, outputDim, outputRank, kvDim, kvRank, params, numParams
+                 )) {
+        QNN_ERROR("composeGraphs FAILED!");
+        return false;
+    }
+    if (false == finalizeGraphs()) {
+        QNN_ERROR("finalizeGraphs FAILED!");
+        return false;
+    }
+
+    for (size_t graphIdx = 0; graphIdx < m_graphsCount; graphIdx++) {
+        m_graphNameToIndex[m_graphsInfo[graphIdx]->graphName] = graphIdx;
+    }
+#if NSP_LOG_LEVEL > 1
+    for (const auto& graphNameIndex : m_graphNameToIndex) {
+        QNN_DEBUG(
+                "Found Graph name %s corresponding to index %d",
+                graphNameIndex.first.c_str(),
+                graphNameIndex.second
+        );
+    }
+#endif
+    return true;
+}
+
+bool QnnApi::graphExecute(
+        Qnn_Tensor_t*                                       input,
+        Qnn_Tensor_t*                                       output,
+        std::string                                         graphName,
+        std::map<std::string, std::pair<double, uint16_t>>& timeLogs
+) {
+    QnnGraph_Config_t** customGraphConfigs{nullptr};
+    uint32_t            configCount{0};
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->beforeExecute(
+                    graphName.c_str(), &customGraphConfigs, &configCount
+            )) {
+            QNN_ERROR("Extensions Failure in beforeExecute()");
+            return false;
+        }
+        if (customGraphConfigs) {
+            if (true != setGraphConfigsBeforeExecute(
+                                m_graphsInfo[m_graphNameToIndex[graphName]]->graph,
+                                customGraphConfigs,
+                                configCount
+                        )) {
+                QNN_ERROR("Failure in setGraphConfigsBeforeExecute()");
+                return false;
+            }
+        }
+    }
+
+    // if (true != boostPerformance()) {
+    //     QNN_ERROR("Couldn't boost the performance");
+    //     return false;
+    // }
+
+    Qnn_ErrorHandle_t ret = QNN_GRAPH_NO_ERROR;
+    try {
+#if NSP_LOG_LEVEL > 1
+        auto start = std::chrono::steady_clock::now();
+#endif
+        ret = m_qnnInterface.graphExecute(
+                m_graphsInfo[m_graphNameToIndex[graphName]]->graph,
+                input,
+                m_graphsInfo[m_graphNameToIndex[graphName]]->numInputTensors,
+                output,
+                m_graphsInfo[m_graphNameToIndex[graphName]]->numOutputTensors,
+                m_profileBackendHandle,
+                nullptr
+        );
+#if NSP_LOG_LEVEL > 1
+        auto stop = std::chrono::steady_clock::now();
+        QNN_DEBUG(
+                "graphExecute[%s] took: %lld us",
+                graphName.c_str(),
+                std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count()
+        );
+#endif
+#if NSP_LOG_LEVEL > 6
+        timeLogs[graphName].first += static_cast<double>(
+                std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count()
+        );
+        timeLogs[graphName].second++;
+#endif
+
+    } catch (const std::exception& ex) {
+        QNN_ERROR("ERROR executing inference ret");
+    } catch (...) {
+        QNN_ERROR("ERROR executing inference ret");
+    }
+
+    if (m_profileBackendHandle) {
+        extractBackendProfilingInfo(m_profileBackendHandle, timeLogs, graphName);
+    }
+
+    // if (true != resetPerformance()) {
+    //     QNN_ERROR("Couldn't reset the performance");
+    //     return false;
+    // }
+
+    if (ret != QNN_GRAPH_NO_ERROR) return false;
+
+    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
+        if (!m_backendExtensions->interface()->afterExecute()) {
+            QNN_ERROR("Extensions Failure in afterExecute()");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool QnnApi::getTensorQuantParams(
+        const Qnn_Tensor_t*      tensor,
+        std::vector<QuantParam>& quantParamsVec
+) {
+    bool status      = false;
+    auto dataType    = QNN_TENSOR_GET_DATA_TYPE(tensor);
+    auto quantParams = QNN_TENSOR_GET_QUANT_PARAMS(tensor);
+    if (dataType == QNN_DATATYPE_UFIXED_POINT_8 || dataType == QNN_DATATYPE_SFIXED_POINT_8 ||
+        dataType == QNN_DATATYPE_UFIXED_POINT_16) {
+        auto quantEncodingType = quantParams.quantizationEncoding;
+        if (quantEncodingType ==
+            Qnn_QuantizationEncoding_t::QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
+            status         = true;
+            double  scale  = quantParams.scaleOffsetEncoding.scale;
+            int32_t offset = quantParams.scaleOffsetEncoding.offset;
+            quantParamsVec.emplace_back(scale, offset);
+        } else if (quantEncodingType ==
+                   Qnn_QuantizationEncoding_t::QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
+            status              = true;
+            auto encodingStruct = quantParams.axisScaleOffsetEncoding;
+            for (uint32_t n = 0; n < encodingStruct.numScaleOffsets; n++) {
+                auto scaleOffset = encodingStruct.scaleOffset[n];
+                quantParamsVec.emplace_back(scaleOffset.scale, scaleOffset.offset);
+            }
+        } else {
+            QNN_ERROR("quant encoding type not supported");
+        }
+    }
+    return status;
+}
+
+bool QnnApi::getTensorShape(std::vector<size_t>& tensorDims, const TensorWrapper& tensorWrapper) {
+    const Qnn_Tensor_t& tensor = GET_TENSOR_WRAPPER_TENSOR(tensorWrapper);
+    if (false ==
+        fillDims(tensorDims, QNN_TENSOR_GET_DIMENSIONS(tensor), QNN_TENSOR_GET_RANK(tensor)))
+        return false;
+
+    tensorDims.push_back(getDataTypeSize(QNN_TENSOR_GET_DATA_TYPE(tensor)));
+    return true;
+}
+
+bool QnnApi::getTensorNameAndShape(
+        std::string&         tensorName,
+        std::vector<size_t>& tensorDims,
+        TensorWrapper&       tensorWrapper
+) {
+    Qnn_Tensor_t& tensor = GET_TENSOR_WRAPPER_TENSOR(tensorWrapper);
+    tensorName           = std::string(GET_TENSOR_WRAPPER_NAME(tensorWrapper));
+    if (false ==
+        fillDims(tensorDims, QNN_TENSOR_GET_DIMENSIONS(tensor), QNN_TENSOR_GET_RANK(tensor)))
+        return false;
+
+    tensorDims.push_back(g_qnnDataTypeToSize[QNN_TENSOR_GET_DATA_TYPE(tensor)]);
+    return true;
+}
+
+bool QnnApi::extractBackendProfilingInfo(
+        Qnn_ProfileHandle_t                                 profileHandle,
+        std::map<std::string, std::pair<double, uint16_t>>& timeLogs,
+        std::string                                         graphName
+) {
+    if (nullptr == m_profileBackendHandle) {
+        QNN_ERROR("QNN HTP Profile handle is nullptr; may not be initialized.");
+        return false;
+    }
+    const QnnProfile_EventId_t* profileEvents{nullptr};
+    uint32_t                    numEvents{0};
+    if (QNN_PROFILE_NO_ERROR !=
+        m_qnnInterface.profileGetEvents(profileHandle, &profileEvents, &numEvents)) {
+        QNN_ERROR("Failure in QNN HTP profile get events.");
+        return false;
+    }
+    QNN_DEBUG("ProfileEvents: [%p], numEvents: [%d]", profileEvents, numEvents);
+    for (size_t event = 0; event < numEvents; event++) {
+        extractProfilingEvent(*(profileEvents + event), timeLogs, graphName);
+        extractProfilingSubEvents(*(profileEvents + event), timeLogs, graphName);
+    }
+    return true;
+}
+
+bool QnnApi::extractProfilingSubEvents(
+        QnnProfile_EventId_t                                profileEventId,
+        std::map<std::string, std::pair<double, uint16_t>>& timeLogs,
+        std::string                                         graphName
+) {
+    const QnnProfile_EventId_t* profileSubEvents{nullptr};
+    uint32_t                    numSubEvents{0};
+    if (QNN_PROFILE_NO_ERROR !=
+        m_qnnInterface.profileGetSubEvents(profileEventId, &profileSubEvents, &numSubEvents)) {
+        QNN_ERROR("Failure in QNN HTP profile get sub events.");
+        return false;
+    }
+    QNN_DEBUG("ProfileSubEvents: [%p], numSubEvents: [%d]", profileSubEvents, numSubEvents);
+    for (size_t subEvent = 0; subEvent < numSubEvents; subEvent++) {
+        extractProfilingEvent(*(profileSubEvents + subEvent), timeLogs, graphName);
+        extractProfilingSubEvents(*(profileSubEvents + subEvent), timeLogs, graphName);
+    }
+    return true;
+}
+
+bool QnnApi::extractProfilingEvent(
+        QnnProfile_EventId_t                                profileEventId,
+        std::map<std::string, std::pair<double, uint16_t>>& timeLogs,
+        std::string                                         graphName
+) {
+    QnnProfile_EventData_t eventData;
+    if (QNN_PROFILE_NO_ERROR != m_qnnInterface.profileGetEventData(profileEventId, &eventData)) {
+        QNN_ERROR("Failure in profile get event type.");
+        return false;
+    }
+
+    QNN_DEBUG(
+            "Event Info - Event Type: [%d], Event Value: [%lu], Event Identifier: [%s], Event Unit: [%d]",
+            eventData.type,
+            eventData.value,
+            eventData.identifier,
+            eventData.unit
+    );
+#if NSP_LOG_LEVEL > 6
+    timeLogs[graphName + "_" + eventData.identifier].first += static_cast<double>(eventData.value);
+    timeLogs[graphName + "_" + eventData.identifier].second++;
+#endif
+
+    return true;
+}
+
+bool QnnApi::extractBackendProfilingInfo(Qnn_ProfileHandle_t profileHandle) {
+    if (nullptr == m_profileBackendHandle) {
+        QNN_ERROR("QNN HTP Profile handle is nullptr; may not be initialized.");
+        return false;
+    }
+    const QnnProfile_EventId_t* profileEvents{nullptr};
+    uint32_t                    numEvents{0};
+    if (QNN_PROFILE_NO_ERROR !=
+        m_qnnInterface.profileGetEvents(profileHandle, &profileEvents, &numEvents)) {
+        QNN_ERROR("Failure in QNN HTP profile get events.");
+        return false;
+    }
+    QNN_DEBUG("ProfileEvents: [%p], numEvents: [%d]", profileEvents, numEvents);
+    for (size_t event = 0; event < numEvents; event++) {
+        extractProfilingEvent(*(profileEvents + event));
+        extractProfilingSubEvents(*(profileEvents + event));
+    }
+    return true;
+}
+
+bool QnnApi::extractProfilingSubEvents(QnnProfile_EventId_t profileEventId) {
+    const QnnProfile_EventId_t* profileSubEvents{nullptr};
+    uint32_t                    numSubEvents{0};
+    if (QNN_PROFILE_NO_ERROR !=
+        m_qnnInterface.profileGetSubEvents(profileEventId, &profileSubEvents, &numSubEvents)) {
+        QNN_ERROR("Failure in QNN HTP profile get sub events.");
+        return false;
+    }
+    QNN_DEBUG("ProfileSubEvents: [%p], numSubEvents: [%d]", profileSubEvents, numSubEvents);
+    for (size_t subEvent = 0; subEvent < numSubEvents; subEvent++) {
+        extractProfilingEvent(*(profileSubEvents + subEvent));
+        extractProfilingSubEvents(*(profileSubEvents + subEvent));
+    }
+    return true;
+}
+
+bool QnnApi::extractProfilingEvent(QnnProfile_EventId_t profileEventId) {
+    QnnProfile_EventData_t eventData;
+    if (QNN_PROFILE_NO_ERROR != m_qnnInterface.profileGetEventData(profileEventId, &eventData)) {
+        QNN_ERROR("Failure in profile get event type.");
+        return false;
+    }
+
+    QNN_DEBUG(
+            "Event Info - Event Type: [%d], Event Value: [%lu], Event Identifier: [%s], Event Unit: [%d]",
+            eventData.type,
+            eventData.value,
+            eventData.identifier,
+            eventData.unit
+    );
+
+    return true;
+}
+
+bool QnnApi::applyBinarySection(uint32_t binIndex, std::string binSectionPath,bool useMmap,bool graphSwitch) {
+#if QUALLA_QNN_API_VERSION < 21700
+    QNN_ERROR("LoRA adaptors require QNN SDK >= 2.25.1. Please update your libraries");
+    return false;
+#else
+    // assumption splitNum  from 0
+    QNN_DEBUG("QnnApi::applyBinarySection %d ", binIndex);
+    uint32_t numAdapterGraph = 0;
+    if (nullptr == m_qnnInterface.contextApplyBinarySection) {
+        QNN_ERROR("contextApplyBinarySection Interface not suported!!");
+        return false;
+    }
+    if (binIndex >= m_graphsCount) {
+        QNN_ERROR(" Passed split %d  base Model graphcount %d ", binIndex, m_graphsCount);
+        return false;
+    }
+    uint64_t                 bufferSize{0};
+    std::shared_ptr<uint8_t> buffer{nullptr};
+    bufferSize = getFileSize(binSectionPath);
+
+    auto graphCountPerContext       = getGraphCountPerContext();
+    if (graphCountPerContext <= 0) {
+        QNN_ERROR(" graphCountPerContext is <=0 ");
+        return false;
+    }
+    const QnnSystemContext_BinaryInfo_t* binaryInfo{nullptr};
+    QnnSystemContext_Handle_t sysCtxHandle{nullptr};
+    if (QNN_SUCCESS != m_qnnSystemInterface.systemContextCreate(&sysCtxHandle)) {
+         QNN_ERROR("Could not create system handle for context index = %zu", binIndex);
+         return false;
+    }
+    Qnn_ContextBinarySize_t binaryInfoSize{0};
+
+    if(m_adapterNameToBuffer[binSectionPath]){
+        buffer = m_adapterNameToBuffer[binSectionPath];
+        if (QNN_SUCCESS != m_qnnSystemInterface.systemContextGetBinaryInfo(
+                            sysCtxHandle,
+                            static_cast<void*>(buffer.get()),
+                            bufferSize,
+                            &binaryInfo,
+                            &binaryInfoSize
+                    )) {
+                   QNN_ERROR("Failed to get context binary info for context index = %zu", binIndex);
+                   return false;
+        }
+    }
+    else{
+            if (!mapAndGetContextBinaryInfo(
+                    useMmap,
+                    buffer,
+                    binSectionPath,
+                    bufferSize,
+                    binIndex,
+                    graphSwitch,
+                    sysCtxHandle,
+                    &binaryInfo
+            )) {
+                 QNN_ERROR("Failed to map context Binary for contextIdx: %zu", binIndex);
+                 return false;
+            }
+            m_adapterNameToBuffer[binSectionPath] = buffer;
+    }
+    numAdapterGraph = getNumGraphInBinary(binaryInfo);
+    if (numAdapterGraph <= 0) {
+        QNN_ERROR(" numAdapterGraph is <=0 ");
+        return false;
+    }
+    uint32_t contextId =  0;
+    uint32_t graphId = 0;
+    for(auto idx = 0;idx<numAdapterGraph;idx++){
+        graphId = numAdapterGraph*binIndex + idx;
+        contextId = graphId / graphCountPerContext;
+        auto contextHandle = m_contextVec[contextId];
+        auto graphHandle   = m_graphsInfo[graphId]->graph;
+        if (contextHandle == nullptr || graphHandle == nullptr) {
+            QNN_ERROR(" contexthandle or graph handle is null for patch no = %d ", graphId);
+            return false;
+        }
+
+        QnnContext_Buffer_t qnnBuffer;
+        qnnBuffer.version               = QNN_CONTEXT_BUFFER_VERSION_1;
+        qnnBuffer.v1.memType            = QNN_CONTEXTMEMTYPE_RAW;
+        qnnBuffer.v1.binaryBuf.dataSize = bufferSize;
+        qnnBuffer.v1.binaryBuf.data     = static_cast<void*>(buffer.get());
+
+        auto errorCode = m_qnnInterface.contextApplyBinarySection(
+                contextHandle,
+                graphHandle,
+                QNN_CONTEXT_SECTION_UPDATABLE,
+                &qnnBuffer,
+                nullptr, //profile handle is null
+                nullptr  //singal handle is null
+        );
+        if (errorCode != QNN_SUCCESS) {
+            QNN_ERROR("Could not Apply Patch for graph = %d errocode = %zu ", graphId, errorCode);
+            return false;
+        }
+    }
+    if(updateIOEncodings(buffer,bufferSize,numAdapterGraph*binIndex) ==false)
+    {
+        QNN_ERROR("qnn-htp: Adapter updateIOEncodings failed");
+        return false;
+    }
+    return true;
+#endif
+}
+
+bool QnnApi::updateIOEncodings(std::shared_ptr<uint8_t>& buffer,uint64_t  bufferSize,uint32_t graphIndex){
+
+    QNN_DEBUG("Applying adapter Encodings");
+    QnnSystemContext_Handle_t sysCtxHandle{nullptr};
+    if (QNN_SUCCESS != m_qnnSystemInterface.systemContextCreate(&sysCtxHandle)) {
+        QNN_ERROR("Could not create system handle for context index = %zu", graphIndex);
+        return false;
+    }
+    const QnnSystemContext_BinaryInfo_t* binaryInfo{nullptr};
+    Qnn_ContextBinarySize_t binaryInfoSize{0};
+    if (QNN_SUCCESS != m_qnnSystemInterface.systemContextGetBinaryInfo(
+                            sysCtxHandle,
+                            static_cast<void*>(buffer.get()),
+                            bufferSize,
+                            &binaryInfo,
+                            &binaryInfoSize
+                    )) {
+        QNN_ERROR("Failed to get context binary info for context index = %zu", graphIndex);
+        return false;
+    }
+    if (!updateMetaDataToGraphsInfo(binaryInfo,  m_graphsInfo,graphIndex)) {
+        QNN_ERROR("Failed to copy metadata for graph index = %zu", graphIndex);
+        return false;
+    }
+    m_qnnSystemInterface.systemContextFree(sysCtxHandle);
+    sysCtxHandle = nullptr;
+    QNN_DEBUG(" updateIOEncodings success ");
+    return true;
+}
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/QnnApi.hpp b/Genie/Genie/src/qualla/engines/qnn-api/QnnApi.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..8392265aa950a95015dce233bbeb5c0456ea0665
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/QnnApi.hpp
@@ -0,0 +1,429 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include "BackendExtensions.hpp"
+#include "QnnConfig.hpp"
+#include "QnnHtpPerfInfrastructure.h"
+#include "QnnHtpDevice.h"
+#include "qnn-utils.hpp"
+#include "IOTensor.hpp"
+
+#include <memory>
+#include <mutex>
+
+#define QNN_IO_TENSOR_DEBUG 0
+
+enum KVManagerMode { POINTER_SHIFT = 0x0, SHIFT_CONCAT = 0x1 };
+
+using qualla::QnnUtils::QuantParam;
+
+#define QUALLA_QNN_API_VERSION                                                                     \
+    (QNN_API_VERSION_MAJOR * 10000 + QNN_API_VERSION_MINOR * 100 + QNN_API_VERSION_PATCH)
+
+static std::map<Qnn_DataType_t, size_t> g_qnnDataTypeToSize = {
+        {QNN_DATATYPE_INT_8, 1},
+        {QNN_DATATYPE_INT_16, 2},
+        {QNN_DATATYPE_INT_32, 4},
+        {QNN_DATATYPE_INT_64, 8},
+        {QNN_DATATYPE_UINT_8, 1},
+        {QNN_DATATYPE_UINT_16, 2},
+        {QNN_DATATYPE_UINT_32, 4},
+        {QNN_DATATYPE_UINT_64, 8},
+        {QNN_DATATYPE_FLOAT_16, 2},
+        {QNN_DATATYPE_FLOAT_32, 4},
+        {QNN_DATATYPE_SFIXED_POINT_8, 1},
+        {QNN_DATATYPE_SFIXED_POINT_16, 2},
+        {QNN_DATATYPE_SFIXED_POINT_32, 4},
+        {QNN_DATATYPE_UFIXED_POINT_8, 1},
+        {QNN_DATATYPE_UFIXED_POINT_16, 2},
+        {QNN_DATATYPE_UFIXED_POINT_32, 4},
+        {QNN_DATATYPE_BOOL_8, 1},
+};
+
+class QnnApi {
+  private:
+    const uint32_t s_graphConfigsReserveCount = 16;
+
+    // Model vars
+    typedef Qnn_ErrorHandle_t (*QnnInterfaceGetProvidersFn_t)(
+            const QnnInterface_t*** providerList,
+            uint32_t*               numProviders
+    );
+    typedef Qnn_ErrorHandle_t (*QnnSystemInterfaceGetProvidersFn_t)(
+            const QnnSystemInterface_t*** providerList,
+            uint32_t*                     numProviders
+    );
+
+    // Graph Related Function Handle Types
+    typedef ModelError_t (*ComposeGraphsFnHandleType_t)(
+            Qnn_BackendHandle_t,
+            QNN_INTERFACE_VER_TYPE,
+            Qnn_ContextHandle_t,
+            const GraphConfigInfo_t**,
+            const uint32_t,
+            GraphInfo_t***,
+            uint32_t*,
+            bool,
+            QnnLog_Callback_t,
+            QnnLog_Level_t
+    );
+
+    typedef ModelError_t (*GenAIComposeGraphsFnHandleType_t)(
+            Qnn_BackendHandle_t,
+            QNN_INTERFACE_VER_TYPE,
+            Qnn_ContextHandle_t,
+            const GraphConfigInfo_t**,
+            const uint32_t,
+            uint32_t*    inputDim,
+            uint32_t     inputRank,
+            uint32_t*    outputDim,
+            uint32_t     outputRank,
+            uint32_t*    kvDim,
+            uint32_t     kvRank,
+            Qnn_Param_t* params,
+            uint32_t     numParam,
+            GraphInfo_t***,
+            uint32_t*,
+            bool,
+            QnnLog_Callback_t,
+            QnnLog_Level_t
+    );
+
+    typedef ModelError_t (*FreeGraphInfoFnHandleType_t)(GraphInfo_t***, uint32_t);
+
+    void* m_libModelHandle{nullptr};
+    void* m_backendHandle{nullptr};
+    void* m_backendLibraryHandle{nullptr};
+
+    QNN_INTERFACE_VER_TYPE             m_qnnInterface{nullptr};
+    QNN_SYSTEM_INTERFACE_VER_TYPE      m_qnnSystemInterface{nullptr};
+    std::unique_ptr<BackendExtensions> m_backendExtensions{nullptr};
+    ComposeGraphsFnHandleType_t        m_composeGraphsFnHandle{nullptr};
+    GenAIComposeGraphsFnHandleType_t   m_genaiComposeGraphsFnHandle{nullptr};
+    FreeGraphInfoFnHandleType_t        m_freeGraphInfoFnHandle{nullptr};
+    uint32_t                           m_backendId{0};
+    Qnn_LogHandle_t    m_logHandle{nullptr};
+    Qnn_DeviceHandle_t m_deviceHandle{nullptr};
+
+    Qnn_ProfileHandle_t m_profileBackendHandle{nullptr};
+
+    std::vector<Qnn_ContextHandle_t>                    m_contextVec;
+    std::unordered_map<GraphInfo*, Qnn_ContextHandle_t> m_contextMap;
+    uint32_t                                            m_graphsCount{0};
+    int32_t                                             graphCountPerContext{-1};
+    GraphInfo_t**                                       m_graphsInfo;
+    std::unordered_map<std::string, uint32_t>           m_graphNameToIndex;
+    std::unordered_map<std::string, GraphInfo*>         m_graphNameToInfo;
+    std::unordered_map<std::string, uint32_t>           m_graphNameToContextIdx;
+    std::unordered_map<uint32_t, Qnn_ContextHandle_t>   m_contextIdtoHandle;
+    std::mutex                                          m_updateCallBackMutex;
+
+    // Useful Structure for IO Esimtation
+    std::unordered_map<int,qualla::QnnUtils::TensorMap> m_graphtoIOMap; // stores {GraphId -> IOTensorMap}
+    typedef int CtxBitVector;
+    std::map<CtxBitVector, std::map<std::string, size_t>> m_contextAllocMap; // stores {Translated ContextId -> {Tensor name, size}}
+    std::map<std::string, std::pair<int, size_t>> m_tensorAllocInfo; // stores {Tensor name -> (fd of RPC buffer, offset)}
+    std::unordered_map<uint32_t, uint32_t> m_graphIdxToContextIdx; // stores {Graph Idx -> Context Idx}
+    std::unordered_map<std::string,std::shared_ptr<uint8_t>> m_adapterNameToBuffer;
+
+    uint32_t              m_backendConfigCount{0};
+    QnnBackend_Config_t** m_backendConfigs{nullptr};
+
+    QnnHtpDevice_PerfInfrastructure_t* m_perfInfra{nullptr};
+    uint32_t                           m_powerConfigId = 1;
+
+     // Useful Structure for IO Esimtation
+    IOTensor*             m_ioBufferMgr{nullptr};
+    int32_t               m_ctxSize{-1};
+    int32_t               m_kvDim{-1};
+    bool                  m_loraWeightEnabled{false};
+    bool                  m_lmHeadWeightInput{false};
+    KVManagerMode         m_kvUpdateMethod{POINTER_SHIFT};
+
+    bool m_isLogInitialized{false};
+    bool m_isBackendInitialized{false};
+    bool m_isContextCreated{false};
+
+    // Variable to keep track of debug mode
+    bool m_DebugModeRequested;
+    bool m_debugQnn{false};
+
+    // Variable to indicate whether to mmap context bins or read them in memory
+    bool m_mmapContextBins;
+    bool m_isDeviceCreated = false;
+
+    std::vector<std::pair<uint8_t*, uint64_t>> m_contextBinBuffersToBeCleared;
+
+    void setDeviceStatus(bool status) { m_isDeviceCreated = status; }
+    bool getDeviceStatus() { return m_isDeviceCreated; }
+    bool getContextConfigs(
+            QnnContext_Config_t***          configs,
+            uint32_t&                       contextConfigCount,
+            Qnn_Priority_t                  contextPriority,
+            bool                            graphSwitching   = false,
+            const std::vector<std::string>& execSelectGraphs = {},
+            bool                            loadSelectGraphs = false
+    );
+    bool mergeAllContextConfigs(
+            QnnContext_Config_t*** allCustomContextConfigs,
+            QnnContext_Config_t**  customConfigs,
+            QnnContext_Config_t**  contextConfigs,
+            uint32_t               customConfigCount,
+            uint32_t               contextConfigCount
+    );
+    bool freeContextConfigs(QnnContext_Config_t** contextConfigs, uint32_t contextConfigCount);
+    bool setGraphConfigsBeforeExecute(
+            Qnn_GraphHandle_t   graphHandle,
+            QnnGraph_Config_t** graphConfigs,
+            uint32_t            configCount
+    );
+
+    bool getQnnInterface(std::string backendPath);
+    bool getQnnSystemInterface(std::string systemLibraryPath);
+    bool loadModel(std::string model_path);
+    bool initializeLogging(const QnnLog_Level_t& logLevel, bool debug_qnn);
+    void terminateLog();
+    bool initializeBackendExtensions(
+            BackendExtensionsConfigs backendExtensionsConfig,
+            PerfProfile              parsedPerfProfile,
+            bool                     debug_qnn
+    );
+    bool initializeBackend();
+    bool terminateBackend();
+    bool createDevice();
+    bool freeDevice();
+    bool createContext(ContextConfigs contextConfig);
+    bool freeContext();
+    bool composeGraphs(std::vector<GraphConfigs> graphConfigs);
+    bool composeGraphs(
+            std::vector<GraphConfigs> graphConfigs,
+            uint32_t*                 inputDim,
+            uint32_t                  inputRank,
+            uint32_t*                 outputDim,
+            uint32_t                  outputRank,
+            uint32_t*                 kvDim,
+            uint32_t                  kvRank,
+            Qnn_Param_t*              params,
+            uint32_t                  numParams
+    );
+    bool mapAndGetContextBinaryInfo(
+            const bool                            use_mmap,
+            std::shared_ptr<uint8_t>&             buffer,
+            const std::string                     binaryPath,
+            const uint64_t                        bufferSize,
+            const size_t                          contextIdx,
+            const bool                            graphSwitching,
+            QnnSystemContext_Handle_t             sysCtxHandle,
+            const QnnSystemContext_BinaryInfo_t** binaryInfo
+    );
+
+    bool parseIOTensorsAndAccumulate();
+    bool registerTensorsWithBackend(uint32_t& graphIdx);
+
+    bool finalizeGraphs();
+    bool initializePerformance();
+    bool destroyPerformance();
+    bool boostPerformance();
+    bool resetPerformance();
+    bool checkCapabilityOfCreateAsync(bool& propRet);
+
+    bool initProfiling();
+    bool extractBackendProfilingInfo(
+            Qnn_ProfileHandle_t                                 profileHandle,
+            std::map<std::string, std::pair<double, uint16_t>>& timeLogs,
+            std::string                                         graphName
+    );
+    bool extractProfilingSubEvents(
+            QnnProfile_EventId_t                                profileEventId,
+            std::map<std::string, std::pair<double, uint16_t>>& timeLogs,
+            std::string                                         graphName
+    );
+    bool extractProfilingEvent(
+            QnnProfile_EventId_t                                profileEventId,
+            std::map<std::string, std::pair<double, uint16_t>>& timeLogs,
+            std::string                                         graphName
+    );
+    bool extractBackendProfilingInfo(Qnn_ProfileHandle_t profileHandle);
+    bool extractProfilingSubEvents(QnnProfile_EventId_t profileEventId);
+    bool extractProfilingEvent(QnnProfile_EventId_t profileEventId);
+
+    Qnn_ContextHandle_t getContextWithId(uint32_t contextId) {
+        return m_contextIdtoHandle[contextId];
+    }
+
+  public:
+    QnnApi() {};
+    ~QnnApi();
+
+    bool           freeGraphs();
+    static QnnApi& getInstance();
+#if QUALLA_QNN_API_VERSION >= 21700
+    static void contextNotifyFn(
+            Qnn_ContextHandle_t                          context,
+            Qnn_GraphHandle_t                            graph,
+            const char*                                  graph_name,
+            QnnContext_createFromBinaryAsyncNotifyType_t completeType,
+            void*                                        notifyParam,
+            Qnn_ErrorHandle_t                            status
+    );
+#endif
+    bool createFromBinary(
+            std::vector<std::string>        cachedBinariesPathVec,
+            ContextConfigs                  contextConfig,
+            int64_t                         spill_fill_buffer_size = 0,
+            uint64_t                        mmap_budget            = 0,
+            bool                            graphSwitching         = false,
+            const std::vector<std::string>& execSelectGraphs       = {},
+            bool                            loadSelectGraphs       = false
+    );
+#if QUALLA_QNN_API_VERSION >= 21700
+    bool createFromBinaryListAsync(
+            std::vector<std::string>        cachedBinariesPathVec,
+            ContextConfigs                  contextConfig,
+            int64_t                         spill_fill_buffer_size = 0,
+            uint64_t                        mmap_budget            = 0,
+            bool                            graphSwitching         = false,
+            const std::vector<std::string>& execSelectGraphs       = {},
+            bool                            loadSelectGraphs       = false
+    );
+#endif
+    bool initialize(
+            std::string                     backendPath,
+            std::vector<std::string>        modelPathOrCachedBinaryPathVec,
+            BackendExtensionsConfigs        backendExtensionsConfig,
+            PerfProfile                     parsedPerfProfile      = PerfProfile::BURST,
+            ContextConfigs                  contextConfig          = ContextConfigs(),
+            std::vector<GraphConfigs>       graphConfigs           = {},
+            bool                            loadFromCachedBinary   = false,
+            std::string                     systemLibraryPath      = "",
+            bool                            debugModeRequested     = false,
+            int64_t                         spill_fill_buffer_size = 0,
+            bool                            mmapContextBins        = false,
+            bool                            asyncInit              = true,
+            uint64_t                        mmap_budget            = 0,
+            bool                            debug_qnn              = false,
+            bool                            graphSwitching         = false,
+            const std::vector<std::string>& execSelectGraphs       = {},
+            bool                            loadSelectGraphs       = false
+    );
+
+    bool registerOpPackage(std::string opPackagePath);
+
+    void setIOTensorBufferMgr(IOTensor* ioBufferMgr){
+        m_ioBufferMgr = ioBufferMgr;
+    }
+
+    void setKVDim(int32_t kvDim){
+        m_kvDim = kvDim;
+    }
+
+    void setContextSize(int32_t ctxSize){
+       m_ctxSize = ctxSize;
+    }
+
+    void setKVUpdateMethod(KVManagerMode kvUpdateMethod){
+       m_kvUpdateMethod = kvUpdateMethod ;
+    }
+
+    std::map<std::string, std::pair<int, size_t>>* getTensorAllocInfo(){
+        return &m_tensorAllocInfo;
+    }
+
+    bool getLmHeadWeightInputEnabled(){
+       return m_lmHeadWeightInput;
+    }
+
+    bool getLoraWeightEnabled(){
+       return m_loraWeightEnabled;
+    }
+    // Initalize with OpPackage
+    bool initialize(
+            std::string               backendPath,
+            std::string               modelPath,
+            std::string               opPackage,
+            ContextConfigs            contextConfig,
+            std::vector<GraphConfigs> graphConfigs,
+            uint32_t*                 inputDim,
+            uint32_t                  inputRank,
+            uint32_t*                 outputDim,
+            uint32_t                  outputRank,
+            uint32_t*                 kvDim,
+            uint32_t                  kvRank,
+            Qnn_Param_t*              params,
+            uint32_t                  numParams,
+            bool                      debugModeRequested
+    );
+
+    bool graphExecute(
+            Qnn_Tensor_t*                                       input,
+            Qnn_Tensor_t*                                       output,
+            std::string                                         graphName,
+            std::map<std::string, std::pair<double, uint16_t>>& timeLogs
+    );
+
+    bool applyBinarySection(uint32_t binIndex, std::string binSectionPath,bool useMmap,bool graphSwitch);
+
+    QNN_INTERFACE_VER_TYPE*           getQnnInterfaceVer() { return &m_qnnInterface; };
+    GraphInfo_t**&                    getGraphsInfo() { return m_graphsInfo; };
+    uint32_t                          getGraphsCount() { return m_graphsCount; };
+    int32_t                           getGraphCountPerContext() { return graphCountPerContext; }
+    std::vector<Qnn_ContextHandle_t>& getContexts() { return m_contextVec; };
+    const Qnn_ContextHandle_t         getContexts(GraphInfo_t* const graph) {
+        return m_contextMap.at(graph);
+    };
+
+    void updateContext(Qnn_ContextHandle_t context, uint32_t contextId) {
+        std::lock_guard<std::mutex> lock(m_updateCallBackMutex);
+        m_contextVec.push_back(context);
+        m_contextIdtoHandle[contextId] = context;
+    }
+
+    void updateQnnApiGraphsandContextsInfo(
+            std::string       graphName,
+            Qnn_GraphHandle_t graph,
+            uint32_t          contextId
+    ) {
+        // set graph handle to GraphInfo
+        std::lock_guard<std::mutex> lock(m_updateCallBackMutex);
+        m_graphNameToInfo[graphName]->graph = graph;
+        m_graphNameToContextIdx[graphName]  = contextId;
+        m_graphsCount++;
+    }
+
+    static inline size_t getDataTypeSize(const Qnn_DataType_t& datatype) {
+        return g_qnnDataTypeToSize[datatype];
+    }
+    static inline std::string getTensorName(const TensorWrapper& tensorWrapper) {
+        return GET_TENSOR_WRAPPER_NAME(tensorWrapper);
+    }
+    static bool getTensorQuantParams(
+            const Qnn_Tensor_t*      tensor,
+            std::vector<QuantParam>& quantParamsVec
+    );
+    static bool getTensorShape(std::vector<size_t>& tensorDims, const TensorWrapper& tensorWrapper);
+    static inline Qnn_DataType_t getTensorDtype(const Qnn_Tensor_t* tensor) {
+        return QNN_TENSOR_GET_DATA_TYPE(tensor);
+    }
+
+    bool getTensorNameAndShape(
+            std::string&         tensorName,
+            std::vector<size_t>& tensorDims,
+            TensorWrapper&       tensorWrapper
+    );
+    static void qnnLogCallback(
+            const char*    fmt,
+            QnnLog_Level_t level,
+            uint64_t       timestamp,
+            va_list        args
+    );
+    bool updateIOEncodings(std::shared_ptr<uint8_t>& buffer,
+                           uint64_t  bufferSize,
+                           uint32_t graphIndex);
+};
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/QnnApiUtils.cpp b/Genie/Genie/src/qualla/engines/qnn-api/QnnApiUtils.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e8e71a2626a7c6e6aa213114fa310ba5450cc486
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/QnnApiUtils.cpp
@@ -0,0 +1,636 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "QnnApiUtils.hpp"
+#include "QnnTypeMacros.hpp"
+
+#include <algorithm>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <tuple>
+
+#include <fcntl.h>
+#include <errno.h>
+
+#ifdef _WIN32
+    #include <windows.h>
+    #define __open   ::_open
+    #define __strdup ::_strdup
+#else
+    #include <unistd.h>
+    #include <sys/mman.h>
+    #define __open   ::open
+    #define __strdup ::strdup
+#endif
+
+bool freeQnnTensorWrapper(TensorWrapper& tensorWrapper) {
+    // free all pointer allocations in struct
+    if (nullptr != GET_TENSOR_WRAPPER_NAME(tensorWrapper)) {
+        free((void*)GET_TENSOR_WRAPPER_NAME(tensorWrapper));
+    }
+
+    Qnn_Tensor_t& tensor = GET_TENSOR_WRAPPER_TENSOR(tensorWrapper);
+    free(QNN_TENSOR_GET_DIMENSIONS(tensor));
+    return true;
+}
+
+bool freeQnnTensorWrappers(TensorWrapper*& tensorWrappers, uint32_t numTensors) {
+    // free all pointer allocations in struct
+    for (size_t i = 0; i < numTensors; i++) {
+        freeQnnTensorWrapper(tensorWrappers[i]);
+    }
+    free(tensorWrappers);
+
+    return true;
+}
+
+bool freeGraphsInfo(GraphInfoPtr_t** graphsInfo, uint32_t numGraphs) {
+    if (graphsInfo == nullptr || *graphsInfo == nullptr) {
+        return false;
+    }
+    for (uint32_t i = 0; i < numGraphs; i++) {
+        if (nullptr != (*graphsInfo)[i]) {
+            free((*graphsInfo)[i]->graphName);
+            freeQnnTensorWrappers(
+                    (*graphsInfo)[i]->inputTensors, (*graphsInfo)[i]->numInputTensors
+            );
+            freeQnnTensorWrappers(
+                    (*graphsInfo)[i]->outputTensors, (*graphsInfo)[i]->numOutputTensors
+            );
+        }
+    }
+    free(**graphsInfo);
+    free(*graphsInfo);
+    *graphsInfo = nullptr;
+
+    return true;
+}
+
+bool freeGraphInfo(GraphInfo_t* graphInfo) {
+    if (graphInfo == nullptr) {
+        return false;
+    }
+    if (nullptr != graphInfo->graphName) {
+        free(graphInfo->graphName);
+    }
+    freeQnnTensorWrappers(graphInfo->inputTensors, graphInfo->numInputTensors);
+    freeQnnTensorWrappers(graphInfo->outputTensors, graphInfo->numOutputTensors);
+    free(graphInfo);
+    return true;
+}
+
+bool updateTensorInfo(const Qnn_Tensor_t* tensorsInfoSrc,
+        TensorWrapper*     tensorWrappers,
+        uint32_t            tensorsCount
+){
+    for (size_t tIdx = 0; tIdx < tensorsCount; tIdx++) {
+        QNN_DEBUG("Extracting tensorInfo for tensor Idx: %d", (int)tIdx);
+        Qnn_Tensor_t& tensor = GET_TENSOR_WRAPPER_TENSOR(tensorWrappers[tIdx]);
+
+        QNN_TENSOR_SET_ID(tensor, QNN_TENSOR_GET_ID(&tensorsInfoSrc[tIdx]));
+        QNN_TENSOR_SET_TYPE(tensor, QNN_TENSOR_GET_TYPE(&tensorsInfoSrc[tIdx]));
+        QNN_TENSOR_SET_DATA_FORMAT(tensor, QNN_TENSOR_GET_DATA_FORMAT(&tensorsInfoSrc[tIdx]));
+        QNN_TENSOR_SET_DATA_TYPE(tensor, QNN_TENSOR_GET_DATA_TYPE(&tensorsInfoSrc[tIdx]));
+        Qnn_QuantizeParams_t qParams = QNN_QUANTIZE_PARAMS_INIT;
+        qParams.encodingDefinition =
+                QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx]).encodingDefinition;
+        qParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
+        if (QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx]).quantizationEncoding ==
+            QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
+            qParams.quantizationEncoding =
+                    QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx]).quantizationEncoding;
+            qParams.scaleOffsetEncoding =
+                    QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx]).scaleOffsetEncoding;
+        } else if (QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx]).quantizationEncoding ==
+                    QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
+            qParams.quantizationEncoding =
+                    QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx]).quantizationEncoding;
+            qParams.axisScaleOffsetEncoding.axis =
+                    QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                            .axisScaleOffsetEncoding.axis;
+            qParams.axisScaleOffsetEncoding.numScaleOffsets =
+                    QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                            .axisScaleOffsetEncoding.numScaleOffsets;
+            if (QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                        .axisScaleOffsetEncoding.numScaleOffsets > 0) {
+                qParams.axisScaleOffsetEncoding.scaleOffset = (Qnn_ScaleOffset_t*)malloc(
+                        QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                                .axisScaleOffsetEncoding.numScaleOffsets *
+                        sizeof(Qnn_ScaleOffset_t)
+                );
+                if (qParams.axisScaleOffsetEncoding.scaleOffset) {
+                    for (size_t idx = 0;
+                            idx < QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                                        .axisScaleOffsetEncoding.numScaleOffsets;
+                            idx++) {
+                        qParams.axisScaleOffsetEncoding.scaleOffset[idx].scale =
+                                QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                                        .axisScaleOffsetEncoding.scaleOffset[idx]
+                                        .scale;
+                        qParams.axisScaleOffsetEncoding.scaleOffset[idx].offset =
+                                QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                                        .axisScaleOffsetEncoding.scaleOffset[idx]
+                                        .offset;
+                    }
+                }
+            }
+        }
+        QNN_TENSOR_SET_QUANT_PARAMS(tensor, qParams);
+        QNN_TENSOR_SET_RANK(tensor, QNN_TENSOR_GET_RANK(&tensorsInfoSrc[tIdx]));
+        if (QNN_TENSOR_GET_RANK(tensorsInfoSrc[tIdx]) > 0) {
+            if (QNN_TENSOR_GET_DIMENSIONS(tensor)) {
+                memcpy(QNN_TENSOR_GET_DIMENSIONS(tensor),
+                        QNN_TENSOR_GET_DIMENSIONS(&tensorsInfoSrc[tIdx]),
+                        QNN_TENSOR_GET_RANK(&tensorsInfoSrc[tIdx]) * sizeof(uint32_t));
+            }
+        }
+    }
+    return true;
+}
+
+bool copyTensorsInfo(
+        const Qnn_Tensor_t* tensorsInfoSrc,
+        TensorWrapper*&     tensorWrappers,
+        uint32_t            tensorsCount
+) {
+
+    auto returnStatus = true;
+    tensorWrappers    = (TensorWrapper*)calloc(tensorsCount, sizeof(TensorWrapper));
+    if (nullptr == tensorWrappers) {
+        QNN_ERROR("Failed to allocate memory for tensorWrappers.");
+        return false;
+    }
+    if (returnStatus) {
+        for (size_t tIdx = 0; tIdx < tensorsCount; tIdx++) {
+            // QNN_DEBUG("Extracting tensorInfo for tensor Idx: %d", (int)tIdx);
+            Qnn_Tensor_t& tensor = GET_TENSOR_WRAPPER_TENSOR(tensorWrappers[tIdx]);
+            tensor               = QNN_TENSOR_INIT;
+
+            const char* tensorName = QNN_TENSOR_GET_NAME(&tensorsInfoSrc[tIdx]);
+            if (!tensorName) {
+                QNN_TENSOR_SET_NAME(tensor, nullptr);
+            } else {
+                QNN_TENSOR_SET_NAME(tensor, __strdup(tensorName));
+            }
+
+            QNN_TENSOR_SET_ID(tensor, QNN_TENSOR_GET_ID(&tensorsInfoSrc[tIdx]));
+            QNN_TENSOR_SET_TYPE(tensor, QNN_TENSOR_GET_TYPE(&tensorsInfoSrc[tIdx]));
+            QNN_TENSOR_SET_DATA_FORMAT(tensor, QNN_TENSOR_GET_DATA_FORMAT(&tensorsInfoSrc[tIdx]));
+            QNN_TENSOR_SET_DATA_TYPE(tensor, QNN_TENSOR_GET_DATA_TYPE(&tensorsInfoSrc[tIdx]));
+            Qnn_QuantizeParams_t qParams = QNN_QUANTIZE_PARAMS_INIT;
+            qParams.encodingDefinition =
+                    QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx]).encodingDefinition;
+            qParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
+            if (QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx]).quantizationEncoding ==
+                QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
+                qParams.quantizationEncoding =
+                        QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx]).quantizationEncoding;
+                qParams.scaleOffsetEncoding =
+                        QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx]).scaleOffsetEncoding;
+            } else if (QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx]).quantizationEncoding ==
+                       QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
+                qParams.quantizationEncoding =
+                        QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx]).quantizationEncoding;
+                qParams.axisScaleOffsetEncoding.axis =
+                        QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                                .axisScaleOffsetEncoding.axis;
+                qParams.axisScaleOffsetEncoding.numScaleOffsets =
+                        QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                                .axisScaleOffsetEncoding.numScaleOffsets;
+                if (QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                            .axisScaleOffsetEncoding.numScaleOffsets > 0) {
+                    qParams.axisScaleOffsetEncoding.scaleOffset = (Qnn_ScaleOffset_t*)malloc(
+                            QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                                    .axisScaleOffsetEncoding.numScaleOffsets *
+                            sizeof(Qnn_ScaleOffset_t)
+                    );
+                    if (qParams.axisScaleOffsetEncoding.scaleOffset) {
+                        for (size_t idx = 0;
+                             idx < QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                                           .axisScaleOffsetEncoding.numScaleOffsets;
+                             idx++) {
+                            qParams.axisScaleOffsetEncoding.scaleOffset[idx].scale =
+                                    QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                                            .axisScaleOffsetEncoding.scaleOffset[idx]
+                                            .scale;
+                            qParams.axisScaleOffsetEncoding.scaleOffset[idx].offset =
+                                    QNN_TENSOR_GET_QUANT_PARAMS(&tensorsInfoSrc[tIdx])
+                                            .axisScaleOffsetEncoding.scaleOffset[idx]
+                                            .offset;
+                        }
+                    }
+                }
+            }
+            QNN_TENSOR_SET_QUANT_PARAMS(tensor, qParams);
+            QNN_TENSOR_SET_RANK(tensor, QNN_TENSOR_GET_RANK(&tensorsInfoSrc[tIdx]));
+            QNN_TENSOR_SET_DIMENSIONS(tensor, nullptr);
+            if (QNN_TENSOR_GET_RANK(tensorsInfoSrc[tIdx]) > 0) {
+                QNN_TENSOR_SET_DIMENSIONS(
+                        tensor,
+                        (uint32_t*)malloc(
+                                QNN_TENSOR_GET_RANK(&tensorsInfoSrc[tIdx]) * sizeof(uint32_t)
+                        )
+                );
+                if (QNN_TENSOR_GET_DIMENSIONS(tensor)) {
+                    memcpy(QNN_TENSOR_GET_DIMENSIONS(tensor),
+                           QNN_TENSOR_GET_DIMENSIONS(&tensorsInfoSrc[tIdx]),
+                           QNN_TENSOR_GET_RANK(&tensorsInfoSrc[tIdx]) * sizeof(uint32_t));
+                }
+            }
+        }
+    }
+
+    return returnStatus;
+}
+
+
+bool updateGraphInfoV1(const QnnSystemContext_GraphInfoV1_t* graphInfoSrc,
+                       GraphInfo_t*                          graphInfoDst
+){
+    if (graphInfoSrc->graphInputs) {
+        if (!updateTensorInfo(
+                    graphInfoSrc->graphInputs,
+                    graphInfoDst->inputTensors,
+                    graphInfoSrc->numGraphInputs
+            )) {
+            return false;
+        }
+    }
+    if (graphInfoSrc->graphOutputs) {
+        if (!updateTensorInfo(
+                    graphInfoSrc->graphOutputs,
+                    graphInfoDst->outputTensors,
+                    graphInfoSrc->numGraphOutputs
+            )) {
+            return false;
+        }
+    }
+    return true;
+}
+
+
+bool updateGraphInfoV3(const QnnSystemContext_GraphInfoV3_t* graphInfoSrc,
+                       GraphInfo_t*                          graphInfoDst
+){
+    if (graphInfoSrc->graphInputs) {
+        if (!updateTensorInfo(
+                graphInfoSrc->graphInputs,
+                graphInfoDst->inputTensors,
+                graphInfoSrc->numGraphInputs
+        )) {
+            return false;
+        }
+    }
+    if (graphInfoSrc->graphOutputs) {
+        if (!updateTensorInfo(
+                graphInfoSrc->graphOutputs,
+                graphInfoDst->outputTensors,
+                graphInfoSrc->numGraphOutputs
+        )) {
+            return false;
+        }
+    }
+    return true;
+}
+
+bool copyGraphsInfoV1(
+        const QnnSystemContext_GraphInfoV1_t* graphInfoSrc,
+        GraphInfo_t*                          graphInfoDst
+) {
+    graphInfoDst->graphName = nullptr;
+    if (graphInfoSrc->graphName) {
+        graphInfoDst->graphName = __strdup(graphInfoSrc->graphName);
+    }
+    graphInfoDst->inputTensors    = nullptr;
+    graphInfoDst->numInputTensors = 0;
+    if (graphInfoSrc->graphInputs) {
+        if (!copyTensorsInfo(
+                    graphInfoSrc->graphInputs,
+                    graphInfoDst->inputTensors,
+                    graphInfoSrc->numGraphInputs
+            )) {
+            return false;
+        }
+        graphInfoDst->numInputTensors = graphInfoSrc->numGraphInputs;
+    }
+    graphInfoDst->outputTensors    = nullptr;
+    graphInfoDst->numOutputTensors = 0;
+    if (graphInfoSrc->graphOutputs) {
+        if (!copyTensorsInfo(
+                    graphInfoSrc->graphOutputs,
+                    graphInfoDst->outputTensors,
+                    graphInfoSrc->numGraphOutputs
+            )) {
+            return false;
+        }
+        graphInfoDst->numOutputTensors = graphInfoSrc->numGraphOutputs;
+    }
+    return true;
+}
+
+bool copyGraphsInfoV3(const QnnSystemContext_GraphInfoV3_t *graphInfoSrc,
+                                     GraphInfo_t *graphInfoDst) {
+    graphInfoDst->graphName = nullptr;
+    if (graphInfoSrc->graphName) {
+        graphInfoDst->graphName =
+                __strdup(graphInfoSrc->graphName);
+    }
+    graphInfoDst->inputTensors    = nullptr;
+    graphInfoDst->numInputTensors = 0;
+    if (graphInfoSrc->graphInputs) {
+        if (!copyTensorsInfo(
+                graphInfoSrc->graphInputs, graphInfoDst->inputTensors, graphInfoSrc->numGraphInputs)) {
+            return false;
+        }
+        graphInfoDst->numInputTensors = graphInfoSrc->numGraphInputs;
+    }
+    graphInfoDst->outputTensors    = nullptr;
+    graphInfoDst->numOutputTensors = 0;
+    if (graphInfoSrc->graphOutputs) {
+        if (!copyTensorsInfo(graphInfoSrc->graphOutputs,
+                             graphInfoDst->outputTensors,
+                             graphInfoSrc->numGraphOutputs)) {
+            return false;
+        }
+        graphInfoDst->numOutputTensors = graphInfoSrc->numGraphOutputs;
+    }
+    return true;
+}
+
+bool updateGraphInfo(const QnnSystemContext_GraphInfo_t* graphsInput,
+                     const uint32_t                      numGraphs,
+                     GraphInfo_t**                       graphsInfo,
+                     uint32_t&                           graphsCount
+){
+
+    for (size_t gIdx = 0; gIdx < numGraphs; gIdx++) {
+        if (graphsInput[gIdx].version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_1) {
+            if(updateGraphInfoV1(&graphsInput[gIdx].graphInfoV1, graphsInfo[graphsCount]) == false) {
+                return false;
+            }
+        }
+        if (graphsInput[gIdx].version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3) {
+            if(updateGraphInfoV3(&graphsInput[gIdx].graphInfoV3, graphsInfo[graphsCount]) == false) {
+                return false;
+            }
+        }
+        graphsCount++;
+    }
+    return true;
+}
+
+
+bool copyGraphsInfo(
+        const QnnSystemContext_GraphInfo_t* graphsInput,
+        const uint32_t                      numGraphs,
+        GraphInfo_t**&                      graphsInfo
+) {
+
+    if (!graphsInput) {
+        QNN_ERROR("Received nullptr for graphsInput.");
+        return false;
+    }
+    auto returnStatus         = true;
+    graphsInfo                = (GraphInfo_t**)calloc(numGraphs, sizeof(GraphInfo_t*));
+    GraphInfo_t* graphInfoArr = (GraphInfo_t*)calloc(numGraphs, sizeof(GraphInfo_t));
+    if (nullptr == graphsInfo || nullptr == graphInfoArr) {
+        QNN_ERROR("Failure to allocate memory for *graphInfo");
+        returnStatus = false;
+    }
+    if (true == returnStatus) {
+        for (size_t gIdx = 0; gIdx < numGraphs; gIdx++) {
+            QNN_DEBUG("Extracting graphsInfo for graph Idx: %d", (int)gIdx);
+            if (graphsInput[gIdx].version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_1) {
+                copyGraphsInfoV1(&graphsInput[gIdx].graphInfoV1, &graphInfoArr[gIdx]);
+            }
+            if (graphsInput[gIdx].version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3) {
+                copyGraphsInfoV3(&graphsInput[gIdx].graphInfoV3, &graphInfoArr[gIdx]);
+            }
+            graphsInfo[gIdx] = graphInfoArr + gIdx;
+        }
+    }
+    if (true != returnStatus) {
+        QNN_DEBUG("Received an ERROR during extractGraphsInfo. Freeing resources.");
+        if (graphsInfo) {
+            for (uint32_t gIdx = 0; gIdx < numGraphs; gIdx++) {
+                if (graphsInfo[gIdx]) {
+                    if (nullptr != graphsInfo[gIdx]->graphName) {
+                        free(graphsInfo[gIdx]->graphName);
+                        graphsInfo[gIdx]->graphName = nullptr;
+                    }
+                    freeQnnTensorWrappers(
+                            graphsInfo[gIdx]->inputTensors, graphsInfo[gIdx]->numInputTensors
+                    );
+                    freeQnnTensorWrappers(
+                            graphsInfo[gIdx]->outputTensors, graphsInfo[gIdx]->numOutputTensors
+                    );
+                }
+            }
+            free(*graphsInfo);
+        }
+        free(graphsInfo);
+        graphsInfo = nullptr;
+    }
+
+    return true;
+}
+
+uint32_t getNumGraphInBinary(const QnnSystemContext_BinaryInfo_t* binaryInfo)
+{
+    uint32_t numGraph = 0;
+    if (nullptr == binaryInfo) {
+       QNN_ERROR("binaryInfo is nullptr.");
+       return false;
+    }
+    if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) {
+        numGraph =  binaryInfo->contextBinaryInfoV1.numGraphs;
+    }else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {
+        numGraph =  binaryInfo->contextBinaryInfoV2.numGraphs;
+    }
+    else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) {
+        numGraph = binaryInfo->contextBinaryInfoV3.numGraphs;
+    }
+    return numGraph;
+}
+
+bool updateMetaDataToGraphsInfo(const QnnSystemContext_BinaryInfo_t* binaryInfo,
+                                GraphInfo_t**  graphsInfo,
+                                uint32_t& graphsCount
+){
+    if (nullptr == binaryInfo) {
+        QNN_ERROR("binaryInfo is nullptr.");
+        return false;
+    }
+    if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) {
+        if (binaryInfo->contextBinaryInfoV1.graphs) {
+            if (!updateGraphInfo(
+                        binaryInfo->contextBinaryInfoV1.graphs,
+                        binaryInfo->contextBinaryInfoV1.numGraphs,
+                        graphsInfo,
+                        graphsCount
+                )) {
+                QNN_ERROR("Failed while copying graphs Info.");
+                return false;
+            }
+            return true;
+        }
+    } else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {
+        if (binaryInfo->contextBinaryInfoV2.graphs) {
+            if (!updateGraphInfo(
+                        binaryInfo->contextBinaryInfoV2.graphs,
+                        binaryInfo->contextBinaryInfoV2.numGraphs,
+                        graphsInfo,
+                        graphsCount
+                )) {
+                QNN_ERROR("Failed while copying graphs Info.");
+                return false;
+            }
+            return true;
+        }
+    } else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) {
+        if (binaryInfo->contextBinaryInfoV3.graphs) {
+            if (!updateGraphInfo(
+                    binaryInfo->contextBinaryInfoV3.graphs,
+                    binaryInfo->contextBinaryInfoV3.numGraphs,
+                    graphsInfo,
+                    graphsCount
+            )) {
+                QNN_ERROR("Failed while copying graphs Info.");
+                return false;
+            }
+            return true;
+        }
+    }
+    QNN_ERROR("Unrecognized system context binary info version.");
+    return false;
+}
+
+bool copyMetadataToGraphsInfo(
+        const QnnSystemContext_BinaryInfo_t* binaryInfo,
+        GraphInfo_t**&                       graphsInfo,
+        uint32_t&                            graphsCount
+) {
+    if (nullptr == binaryInfo) {
+        QNN_ERROR("binaryInfo is nullptr.");
+        return false;
+    }
+    graphsCount = 0;
+    if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) {
+        if (binaryInfo->contextBinaryInfoV1.graphs) {
+            if (!copyGraphsInfo(
+                        binaryInfo->contextBinaryInfoV1.graphs,
+                        binaryInfo->contextBinaryInfoV1.numGraphs,
+                        graphsInfo
+                )) {
+                QNN_ERROR("Failed while copying graphs Info.");
+                return false;
+            }
+            graphsCount = binaryInfo->contextBinaryInfoV1.numGraphs;
+            return true;
+        }
+    } else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {
+        if (binaryInfo->contextBinaryInfoV2.graphs) {
+            if (!copyGraphsInfo(
+                        binaryInfo->contextBinaryInfoV2.graphs,
+                        binaryInfo->contextBinaryInfoV2.numGraphs,
+                        graphsInfo
+                )) {
+                QNN_ERROR("Failed while copying graphs Info.");
+                return false;
+            }
+            graphsCount = binaryInfo->contextBinaryInfoV2.numGraphs;
+            return true;
+        }
+    } else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) {
+        if (binaryInfo->contextBinaryInfoV3.graphs) {
+            if (!copyGraphsInfo(binaryInfo->contextBinaryInfoV3.graphs,
+                                binaryInfo->contextBinaryInfoV3.numGraphs,
+                                graphsInfo)) {
+                QNN_ERROR("Failed while copying graphs Info.");
+                return false;
+            }
+            graphsCount = binaryInfo->contextBinaryInfoV3.numGraphs;
+            return true;
+        }
+    }
+    QNN_ERROR("Unrecognized system context binary info version.");
+    return false;
+}
+
+size_t getFileSize(std::string filePath) {
+    std::ifstream in(filePath, std::ifstream::binary);
+    if (!in) {
+        QNN_ERROR("Failed to open input file: %s", filePath.c_str());
+        return 0;
+    }
+    in.seekg(0, in.end);
+    const size_t length = in.tellg();
+    in.seekg(0, in.beg);
+    return length;
+}
+
+bool readBinaryFromFile(std::string filePath, void* buffer, size_t bufferSize) {
+    if (nullptr == buffer) {
+        QNN_ERROR("buffer is nullptr");
+        return false;
+    }
+    std::ifstream in(filePath, std::ifstream::binary);
+    if (!in) {
+        QNN_ERROR("Failed to open input file: %s", filePath.c_str());
+        return false;
+    }
+    if (!in.read(reinterpret_cast<char*>(buffer), bufferSize)) {
+        QNN_ERROR("Failed to read the contents of: %s", filePath.c_str());
+        return false;
+    }
+    return true;
+}
+
+bool mmapBinaryFile(std::string filePath, void** buffer, size_t bufferSize) {
+#ifndef _WIN32
+    int fd     = open(filePath.c_str(), O_RDONLY);
+    int OFFSET = 0;
+
+    // read the binary file as memory map
+    *buffer = mmap(nullptr, bufferSize, PROT_READ, MAP_PRIVATE, fd, OFFSET);
+    close(fd);
+    if (madvise(*buffer, bufferSize, MADV_NOHUGEPAGE)) {
+        QNN_ERROR("Failed to advise OS on memory usage err: %s", strerror(errno));
+    }
+
+    return true;
+#else
+    return false;
+#endif
+}
+
+bool fillDims(std::vector<size_t>& dims, uint32_t* inDimensions, uint32_t rank) {
+    if (nullptr == inDimensions) {
+        QNN_ERROR("input dimensions is nullptr");
+        return false;
+    }
+
+    if (rank < 1) {
+        QNN_ERROR("invalid rank : %d", rank);
+        return false;
+    }
+
+    // In case, rank is less than 4, we are pushing 1s
+    for (size_t r = 0; r < 4 - rank; r++) {
+        dims.push_back(1);
+    }
+
+    for (size_t r = 0; r < rank; r++) {
+        dims.push_back(inDimensions[r]);
+    }
+
+    return true;
+}
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/QnnApiUtils.hpp b/Genie/Genie/src/qualla/engines/qnn-api/QnnApiUtils.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..71ccaf9610ce5707c00a627350fe38559372263f
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/QnnApiUtils.hpp
@@ -0,0 +1,94 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "QnnInterface.h"
+#include "QnnTypes.h"
+#include "System/QnnSystemInterface.h"
+
+#include <iostream>
+#include <map>
+#include <queue>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "QnnTypeDef.hpp"
+#include "Log.hpp"
+
+/**
+ * @brief Frees all memory allocated tensor attributes.
+ *
+ * @param[in] tensorWrapper tensor object to free
+ *
+ * @return Error code
+ */
+bool freeQnnTensorWrapper(TensorWrapper& tensorWrapper);
+
+/**
+ * @brief Loops through and frees all memory allocated tensor attributes for each tensorWrapper
+ * object.
+ *
+ * @param[in] tensorWrappers array of tensor objects to free
+ *
+ * @param[in] numTensors length of the above tensorWrappers array
+ *
+ * @return Error code
+ */
+bool freeQnnTensorWrappers(TensorWrapper*& tensorWrappers, uint32_t numTensors);
+
+/**
+ * @brief A helper function to free memory malloced for communicating the Graph for a model(s)
+ *
+ * @param[in] graphsInfo Pointer pointing to location of graph objects
+ *
+ * @param[in] numGraphs The number of graph objects the above pointer is pointing to
+ *
+ * @return Error code
+ *
+ */
+bool freeGraphsInfo(GraphInfoPtr_t** graphsInfo, uint32_t numGraphs);
+
+bool freeGraphInfo(GraphInfo_t* graphInfo);
+
+bool copyMetadataToGraphsInfo(
+        const QnnSystemContext_BinaryInfo_t* binaryInfo,
+        GraphInfo_t**&                       graphsInfo,
+        uint32_t&                            graphsCount
+);
+
+bool copyGraphsInfo(
+        const QnnSystemContext_GraphInfo_t* graphsInput,
+        const uint32_t                      numGraphs,
+        GraphInfo_t**&                      graphsInfo
+);
+
+bool copyGraphsInfoV1(
+        const QnnSystemContext_GraphInfoV1_t* graphInfoSrc,
+        GraphInfo_t*                          graphInfoDst
+);
+
+bool copyTensorsInfo(
+        const Qnn_Tensor_t* tensorsInfoSrc,
+        TensorWrapper*&     tensorWrappers,
+        uint32_t            tensorsCount
+);
+
+bool   fillDims(std::vector<size_t>& dims, uint32_t* inDimensions, uint32_t rank);
+size_t getFileSize(std::string filePath);
+bool   readBinaryFromFile(std::string filePath, void* buffer, size_t bufferSize);
+bool   mmapBinaryFile(std::string filePath, void** buffer, size_t bufferSize);
+bool updateMetaDataToGraphsInfo(const QnnSystemContext_BinaryInfo_t* binaryInfo,GraphInfo_t**  graphsInfo,uint32_t& graphsCount);
+bool updateGraphInfo(const QnnSystemContext_GraphInfo_t* graphsInput,
+                     const uint32_t                      currCount,
+                     GraphInfo_t*                        graphsInfo);
+bool updateGraphInfoV1(const QnnSystemContext_GraphInfoV1_t* graphInfoSrc,
+                       GraphInfo_t*                          graphInfoDst);
+bool updateTensorInfo(const Qnn_Tensor_t* tensorsInfoSrc,
+                      TensorWrapper*     tensorWrappers,
+                      uint32_t            tensorsCount);
+uint32_t getNumGraphInBinary(const QnnSystemContext_BinaryInfo_t* binaryInfo);
\ No newline at end of file
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/QnnConfig.hpp b/Genie/Genie/src/qualla/engines/qnn-api/QnnConfig.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1690c589197a00352fe29fa969b86c19a5839677
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/QnnConfig.hpp
@@ -0,0 +1,44 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+#pragma once
+
+#include "QnnGraph.h"
+#include "QnnTypes.h"
+#include <vector>
+
+struct BackendExtensionsConfigs {
+    std::string sharedLibraryPath;
+    std::string configFilePath;
+    BackendExtensionsConfigs() : sharedLibraryPath(""), configFilePath("") {}
+    BackendExtensionsConfigs(std::string sharedLibraryPath, std::string configFilePath)
+        : sharedLibraryPath(sharedLibraryPath), configFilePath(configFilePath) {}
+};
+
+struct ContextConfigs {
+    bool           priorityPresent;
+    Qnn_Priority_t priority;
+    ContextConfigs() : priorityPresent(false), priority(QNN_PRIORITY_UNDEFINED) {}
+    ContextConfigs(Qnn_Priority_t priority) : priorityPresent(true), priority(priority) {}
+};
+
+struct GraphConfigs {
+    std::string graphName;
+    bool           priorityPresent;
+    Qnn_Priority_t priority;
+    GraphConfigs()
+        : graphName(),
+          priorityPresent(false), priority(QNN_PRIORITY_UNDEFINED) {
+    }
+};
+
+struct ConfigOptions {
+    BackendExtensionsConfigs  backendExtensionsConfigs;
+    ContextConfigs            contextConfigs;
+    std::vector<GraphConfigs> graphConfigs;
+    ConfigOptions() : backendExtensionsConfigs(), contextConfigs(), graphConfigs() {}
+};
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/QnnTypeDef.hpp b/Genie/Genie/src/qualla/engines/qnn-api/QnnTypeDef.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a6ed57fc8bea91e957ee66f7d9918e6d1bb78cf3
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/QnnTypeDef.hpp
@@ -0,0 +1,52 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QNN_TYPE_DEF_H_
+#define QNN_TYPE_DEF_H_
+
+#include "QnnInterface.h"
+#include "QnnTypes.h"
+#include "Log.hpp"
+#include "QnnTypeMacros.hpp"
+
+typedef enum ModelError {
+    MODEL_NO_ERROR               = 0,
+    MODEL_TENSOR_ERROR           = 1,
+    MODEL_PARAMS_ERROR           = 2,
+    MODEL_NODES_ERROR            = 3,
+    MODEL_GRAPH_ERROR            = 4,
+    MODEL_CONTEXT_ERROR          = 5,
+    MODEL_GENERATION_ERROR       = 6,
+    MODEL_SETUP_ERROR            = 7,
+    MODEL_INVALID_ARGUMENT_ERROR = 8,
+    MODEL_FILE_ERROR             = 9,
+    MODEL_MEMORY_ALLOCATE_ERROR  = 10,
+    // Value selected to ensure 32 bits.
+    MODEL_UNKNOWN_ERROR = 0x7FFFFFFF
+} ModelError_t;
+
+using TensorWrapper = Qnn_Tensor_t;
+    #define GET_TENSOR_WRAPPER_TENSOR(tensorWrapper) tensorWrapper
+    #define GET_TENSOR_WRAPPER_NAME(tensorWrapper)   QNN_TENSOR_GET_NAME(tensorWrapper)
+
+typedef struct GraphInfo {
+    Qnn_GraphHandle_t graph;
+    char*             graphName;
+    TensorWrapper*    inputTensors;
+    uint32_t          numInputTensors;
+    TensorWrapper*    outputTensors;
+    uint32_t          numOutputTensors;
+} GraphInfo_t;
+typedef GraphInfo_t* GraphInfoPtr_t;
+
+typedef struct GraphConfigInfo {
+    char*                     graphName;
+    const QnnGraph_Config_t** graphConfigs;
+} GraphConfigInfo_t;
+
+#endif // QNN_TYPE_DEF_H_
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/QnnTypeMacros.hpp b/Genie/Genie/src/qualla/engines/qnn-api/QnnTypeMacros.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..cc0548e07031a1a1dae7010164018be3671a51f1
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/QnnTypeMacros.hpp
@@ -0,0 +1,702 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include "QnnTypes.h"
+
+#define QNN_OP_CFG_VALID(opConfig) ((opConfig).version == QNN_OPCONFIG_VERSION_1)
+
+inline Qnn_OpConfig_t createQnnOpConfig(const Qnn_OpConfigVersion_t version) {
+    Qnn_OpConfig_t opConfig = QNN_OPCONFIG_INIT;
+    opConfig.version        = version;
+    if (version == QNN_OPCONFIG_VERSION_1) {
+        opConfig.v1 = QNN_OPCONFIG_V1_INIT;
+    }
+    return opConfig;
+}
+
+inline const char* getQnnOpConfigName(const Qnn_OpConfig_t& opConfig) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        return opConfig.v1.name;
+    }
+    return NULL;
+}
+
+inline const char* getQnnOpConfigName(const Qnn_OpConfig_t* const opConfig) {
+    return getQnnOpConfigName(*opConfig);
+}
+
+inline const char* getQnnOpConfigPackageName(const Qnn_OpConfig_t& opConfig) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        return opConfig.v1.packageName;
+    }
+    return NULL;
+}
+
+inline const char* getQnnOpConfigPackageName(const Qnn_OpConfig_t* const opConfig) {
+    return getQnnOpConfigPackageName(*opConfig);
+}
+
+inline const char* getQnnOpConfigTypeName(const Qnn_OpConfig_t& opConfig) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        return opConfig.v1.typeName;
+    }
+    return NULL;
+}
+
+inline const char* getQnnOpConfigTypeName(const Qnn_OpConfig_t* const opConfig) {
+    return getQnnOpConfigTypeName(*opConfig);
+}
+
+inline uint32_t getQnnOpConfigNumParams(const Qnn_OpConfig_t& opConfig) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        return opConfig.v1.numOfParams;
+    }
+    return 0u;
+}
+
+inline uint32_t getQnnOpConfigNumParams(const Qnn_OpConfig_t* const opConfig) {
+    return getQnnOpConfigNumParams(*opConfig);
+}
+
+inline const Qnn_Param_t* getQnnOpConfigParams(const Qnn_OpConfig_t& opConfig) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        return opConfig.v1.params;
+    }
+    return NULL;
+}
+
+inline const Qnn_Param_t* getQnnOpConfigParams(const Qnn_OpConfig_t* const opConfig) {
+    return getQnnOpConfigParams(*opConfig);
+}
+
+inline uint32_t getQnnOpConfigNumInputs(const Qnn_OpConfig_t& opConfig) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        return opConfig.v1.numOfInputs;
+    }
+    return 0u;
+}
+
+inline uint32_t getQnnOpConfigNumInputs(const Qnn_OpConfig_t* const opConfig) {
+    return getQnnOpConfigNumInputs(*opConfig);
+}
+
+inline const Qnn_Tensor_t* getQnnOpConfigInputs(const Qnn_OpConfig_t& opConfig) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        return opConfig.v1.inputTensors;
+    }
+    return NULL;
+}
+
+inline const Qnn_Tensor_t* getQnnOpConfigInputs(const Qnn_OpConfig_t* const opConfig) {
+    return getQnnOpConfigInputs(*opConfig);
+}
+
+inline uint32_t getQnnOpConfigNumOutputs(const Qnn_OpConfig_t& opConfig) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        return opConfig.v1.numOfOutputs;
+    }
+    return 0u;
+}
+
+inline uint32_t getQnnOpConfigNumOutputs(const Qnn_OpConfig_t* const opConfig) {
+    return getQnnOpConfigNumOutputs(*opConfig);
+}
+
+inline const Qnn_Tensor_t* getQnnOpConfigOutputs(const Qnn_OpConfig_t& opConfig) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        return opConfig.v1.outputTensors;
+    }
+    return NULL;
+}
+
+inline const Qnn_Tensor_t* getQnnOpConfigOutputs(const Qnn_OpConfig_t* const opConfig) {
+    return getQnnOpConfigOutputs(*opConfig);
+}
+
+inline void setQnnOpConfigName(Qnn_OpConfig_t& opConfig, const char* const name) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        opConfig.v1.name = name;
+    }
+}
+
+inline void setQnnOpConfigName(Qnn_OpConfig_t* const opConfig, const char* const name) {
+    setQnnOpConfigName(*opConfig, name);
+}
+
+inline void setQnnOpConfigPackageName(Qnn_OpConfig_t& opConfig, const char* const packageName) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        opConfig.v1.packageName = packageName;
+    }
+}
+
+inline void setQnnOpConfigPackageName(
+        Qnn_OpConfig_t* const opConfig,
+        const char* const     packageName
+) {
+    setQnnOpConfigPackageName(*opConfig, packageName);
+}
+
+inline void setQnnOpConfigTypeName(Qnn_OpConfig_t& opConfig, const char* const typeName) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        opConfig.v1.typeName = typeName;
+    }
+}
+
+inline void setQnnOpConfigTypeName(Qnn_OpConfig_t* const opConfig, const char* const typeName) {
+    setQnnOpConfigTypeName(*opConfig, typeName);
+}
+
+inline void setQnnOpConfigParams(
+        Qnn_OpConfig_t&    opConfig,
+        uint32_t const     numOfParams,
+        Qnn_Param_t* const params
+) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        opConfig.v1.numOfParams = numOfParams;
+        opConfig.v1.params      = params;
+    }
+}
+
+inline void setQnnOpConfigParams(
+        Qnn_OpConfig_t* const opConfig,
+        uint32_t const        numOfParams,
+        Qnn_Param_t* const    params
+) {
+    setQnnOpConfigParams(*opConfig, numOfParams, params);
+}
+
+inline void setQnnOpConfigInputs(
+        Qnn_OpConfig_t&     opConfig,
+        uint32_t const      numOfInputs,
+        Qnn_Tensor_t* const inputTensors
+) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        opConfig.v1.numOfInputs  = numOfInputs;
+        opConfig.v1.inputTensors = inputTensors;
+    }
+}
+
+inline void setQnnOpConfigInputs(
+        Qnn_OpConfig_t* const opConfig,
+        uint32_t const        numOfInputs,
+        Qnn_Tensor_t* const   inputTensors
+) {
+    setQnnOpConfigInputs(*opConfig, numOfInputs, inputTensors);
+}
+
+inline void setQnnOpConfigOutputs(
+        Qnn_OpConfig_t&     opConfig,
+        uint32_t const      numOfOutputs,
+        Qnn_Tensor_t* const outputTensors
+) {
+    if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+        opConfig.v1.numOfOutputs  = numOfOutputs;
+        opConfig.v1.outputTensors = outputTensors;
+    }
+}
+
+inline void setQnnOpConfigOutputs(
+        Qnn_OpConfig_t* const opConfig,
+        uint32_t const        numOfOutputs,
+        Qnn_Tensor_t* const   outputTensors
+) {
+    setQnnOpConfigOutputs(*opConfig, numOfOutputs, outputTensors);
+}
+
+inline Qnn_Tensor_t createQnnTensor(const Qnn_TensorVersion_t version) {
+    Qnn_Tensor_t tensor = QNN_TENSOR_INIT;
+    tensor.version      = version;
+    if (version == QNN_TENSOR_VERSION_1) {
+        tensor.v1 = QNN_TENSOR_V1_INIT;
+    } else if (version == QNN_TENSOR_VERSION_2) {
+        tensor.v2 = QNN_TENSOR_V2_INIT;
+    }
+    return tensor;
+}
+
+inline uint32_t getQnnTensorId(const Qnn_Tensor_t& tensor) {
+    // TensorCompatTest justifies no need to check version
+    return tensor.v1.id;
+}
+
+inline uint32_t getQnnTensorId(const Qnn_Tensor_t* const tensor) {
+    return getQnnTensorId(*tensor);
+}
+
+inline const char* getQnnTensorName(const Qnn_Tensor_t& tensor) {
+    // TensorCompatTest justifies no need to check version
+    return tensor.v1.name;
+}
+
+inline const char* getQnnTensorName(const Qnn_Tensor_t* const tensor) {
+    return getQnnTensorName(*tensor);
+}
+
+inline Qnn_TensorType_t getQnnTensorType(const Qnn_Tensor_t& tensor) {
+    // TensorCompatTest justifies no need to check version
+    return tensor.v1.type;
+}
+
+inline Qnn_TensorType_t getQnnTensorType(const Qnn_Tensor_t* const tensor) {
+    return getQnnTensorType(*tensor);
+}
+
+inline Qnn_TensorDataFormat_t getQnnTensorDataFormat(const Qnn_Tensor_t& tensor) {
+    // TensorCompatTest justifies no need to check version
+    return tensor.v1.dataFormat;
+}
+
+inline Qnn_TensorDataFormat_t getQnnTensorDataFormat(const Qnn_Tensor_t* const tensor) {
+    return getQnnTensorDataFormat(*tensor);
+}
+
+inline Qnn_DataType_t getQnnTensorDataType(const Qnn_Tensor_t& tensor) {
+    // TensorCompatTest justifies no need to check version
+    return tensor.v1.dataType;
+}
+
+inline Qnn_DataType_t getQnnTensorDataType(const Qnn_Tensor_t* const tensor) {
+    return getQnnTensorDataType(*tensor);
+}
+
+inline Qnn_QuantizeParams_t getQnnTensorQuantParams(const Qnn_Tensor_t& tensor) {
+    // TensorCompatTest justifies no need to check version
+    return tensor.v1.quantizeParams;
+}
+
+inline Qnn_QuantizeParams_t getQnnTensorQuantParams(const Qnn_Tensor_t* const tensor) {
+    if (tensor != nullptr) {
+        return getQnnTensorQuantParams(*tensor);
+    }
+    return QNN_QUANTIZE_PARAMS_INIT;
+}
+
+inline uint32_t getQnnTensorRank(const Qnn_Tensor_t& tensor) {
+    // TensorCompatTest justifies no need to check version
+    return tensor.v1.rank;
+}
+
+inline uint32_t getQnnTensorRank(const Qnn_Tensor_t* const tensor) {
+    if (tensor != nullptr) {
+        return getQnnTensorRank(*tensor);
+    }
+    return 0u;
+}
+
+inline uint32_t* getQnnTensorDimensions(const Qnn_Tensor_t& tensor) {
+    // TensorCompatTest justifies no need to check version
+    return tensor.v1.dimensions;
+}
+
+inline uint32_t* getQnnTensorDimensions(const Qnn_Tensor_t* const tensor) {
+    return getQnnTensorDimensions(*tensor);
+}
+
+inline uint8_t* getQnnTensorIsDynamicDimensions(const Qnn_Tensor_t& tensor) {
+    if (tensor.version == QNN_TENSOR_VERSION_1) {
+        return NULL;
+    } else if (tensor.version == QNN_TENSOR_VERSION_2) {
+        return tensor.v2.isDynamicDimensions;
+    }
+    return NULL;
+}
+
+inline uint8_t* getQnnTensorIsDynamicDimensions(const Qnn_Tensor_t* tensor) {
+    return getQnnTensorIsDynamicDimensions(*tensor);
+}
+
+inline Qnn_SparseParams_t getQnnTensorSparseParams(const Qnn_Tensor_t& tensor) {
+    if (tensor.version == QNN_TENSOR_VERSION_1) {
+        return QNN_SPARSE_PARAMS_INIT;
+    } else if (tensor.version == QNN_TENSOR_VERSION_2) {
+        return tensor.v2.sparseParams;
+    }
+    return QNN_SPARSE_PARAMS_INIT;
+}
+
+inline Qnn_SparseParams_t getQnnTensorSparseParams(const Qnn_Tensor_t* tensor) {
+    return getQnnTensorSparseParams(*tensor);
+}
+
+inline Qnn_TensorMemType_t getQnnTensorMemType(const Qnn_Tensor_t& tensor) {
+    // TensorCompatTest justifies no need to check version
+    return tensor.v1.memType;
+}
+
+inline Qnn_TensorMemType_t getQnnTensorMemType(const Qnn_Tensor_t* const tensor) {
+    return getQnnTensorMemType(*tensor);
+}
+
+inline Qnn_ClientBuffer_t getQnnTensorClientBuf(const Qnn_Tensor_t& tensor) {
+    // TensorCompatTest justifies no need to check version
+    return tensor.v1.clientBuf;
+}
+
+inline Qnn_ClientBuffer_t getQnnTensorClientBuf(const Qnn_Tensor_t* const tensor) {
+    return getQnnTensorClientBuf(*tensor);
+}
+
+inline Qnn_MemHandle_t getQnnTensorMemHandle(const Qnn_Tensor_t& tensor) {
+    // TensorCompatTest justifies no need to check version
+    return tensor.v1.memHandle;
+}
+
+inline Qnn_MemHandle_t getQnnTensorMemHandle(const Qnn_Tensor_t* const tensor) {
+    return getQnnTensorMemHandle(*tensor);
+}
+
+inline void setQnnTensorId(Qnn_Tensor_t& tensor, const uint32_t id) {
+    // TensorCompatTest justifies no need to check version
+    tensor.v1.id = id;
+}
+
+inline void setQnnTensorId(Qnn_Tensor_t* const tensor, const uint32_t id) {
+    setQnnTensorId(*tensor, id);
+}
+
+inline void setQnnTensorName(Qnn_Tensor_t& tensor, const char* const name) {
+    // TensorCompatTest justifies no need to check version
+    tensor.v1.name = name;
+}
+
+inline void setQnnTensorName(Qnn_Tensor_t* const tensor, const char* const name) {
+    setQnnTensorName(*tensor, name);
+}
+
+inline void setQnnTensorType(Qnn_Tensor_t& tensor, const Qnn_TensorType_t type) {
+    // TensorCompatTest justifies no need to check version
+    tensor.v1.type = type;
+}
+
+inline void setQnnTensorType(Qnn_Tensor_t* const tensor, const Qnn_TensorType_t type) {
+    setQnnTensorType(*tensor, type);
+}
+
+inline void setQnnTensorDataFormat(Qnn_Tensor_t& tensor, const Qnn_TensorDataFormat_t dataFormat) {
+    // TensorCompatTest justifies no need to check version
+    tensor.v1.dataFormat = dataFormat;
+}
+
+inline void setQnnTensorDataFormat(
+        Qnn_Tensor_t* const          tensor,
+        const Qnn_TensorDataFormat_t format
+) {
+    setQnnTensorDataFormat(*tensor, format);
+}
+
+inline void setQnnTensorDataType(Qnn_Tensor_t& tensor, const Qnn_DataType_t dataType) {
+    // TensorCompatTest justifies no need to check version
+    tensor.v1.dataType = dataType;
+}
+
+inline void setQnnTensorDataType(Qnn_Tensor_t* const tensor, const Qnn_DataType_t dataType) {
+    setQnnTensorDataType(*tensor, dataType);
+}
+
+inline void setQnnTensorQuantParams(
+        Qnn_Tensor_t&              tensor,
+        const Qnn_QuantizeParams_t quantizeParams
+) {
+    // TensorCompatTest justifies no need to check version
+    tensor.v1.quantizeParams = quantizeParams;
+}
+
+inline void setQnnTensorQuantParams(Qnn_Tensor_t* const tensor, const Qnn_QuantizeParams_t params) {
+    setQnnTensorQuantParams(*tensor, params);
+}
+
+inline void setQnnTensorRank(Qnn_Tensor_t& tensor, const uint32_t rank) {
+    // TensorCompatTest justifies no need to check version
+    tensor.v1.rank = rank;
+}
+
+inline void setQnnTensorRank(Qnn_Tensor_t* const tensor, const uint32_t rank) {
+    setQnnTensorRank(*tensor, rank);
+}
+
+inline void setQnnTensorDimensions(Qnn_Tensor_t& tensor, uint32_t* const dimensions) {
+    // TensorCompatTest justifies no need to check version
+    tensor.v1.dimensions = dimensions;
+}
+
+inline void setQnnTensorDimensions(Qnn_Tensor_t* const tensor, uint32_t* const dimensions) {
+    setQnnTensorDimensions(*tensor, dimensions);
+}
+
+inline void setQnnTensorIsDynamicDimensions(
+        Qnn_Tensor_t&  tensor,
+        uint8_t* const isDynamicDimensions
+) {
+    if (tensor.version == QNN_TENSOR_VERSION_2) {
+        tensor.v2.isDynamicDimensions = isDynamicDimensions;
+    }
+}
+
+inline void setQnnTensorIsDynamicDimensions(
+        Qnn_Tensor_t*  tensor,
+        uint8_t* const isDynamicDimensions
+) {
+    setQnnTensorIsDynamicDimensions(*tensor, isDynamicDimensions);
+}
+
+inline void setQnnTensorSparseParams(Qnn_Tensor_t& tensor, const Qnn_SparseParams_t sparseParams) {
+    if (tensor.version == QNN_TENSOR_VERSION_2) {
+        tensor.v2.sparseParams = sparseParams;
+    }
+}
+
+inline void setQnnTensorSparseParams(Qnn_Tensor_t* tensor, Qnn_SparseParams_t sparseParams) {
+    setQnnTensorSparseParams(*tensor, sparseParams);
+}
+
+inline void setQnnTensorMemType(Qnn_Tensor_t& tensor, const Qnn_TensorMemType_t memType) {
+    // TensorCompatTest justifies no need to check version
+    tensor.v1.memType = memType;
+}
+
+inline void setQnnTensorMemType(Qnn_Tensor_t* const tensor, const Qnn_TensorMemType_t memType) {
+    setQnnTensorMemType(*tensor, memType);
+}
+
+inline void setQnnTensorClientBuf(Qnn_Tensor_t& tensor, const Qnn_ClientBuffer_t clientBuf) {
+    // TensorCompatTest justifies no need to check version
+    tensor.v1.clientBuf = clientBuf;
+}
+
+inline void setQnnTensorClientBuf(Qnn_Tensor_t* const tensor, const Qnn_ClientBuffer_t clientBuf) {
+    setQnnTensorClientBuf(*tensor, clientBuf);
+}
+
+inline void setQnnTensorMemHandle(Qnn_Tensor_t& tensor, const Qnn_MemHandle_t memHandle) {
+    // TensorCompatTest justifies no need to check version
+    tensor.v1.memHandle = memHandle;
+}
+
+inline void setQnnTensorMemHandle(Qnn_Tensor_t* const tensor, const Qnn_MemHandle_t handle) {
+    setQnnTensorMemHandle(*tensor, handle);
+}
+
+inline Qnn_TensorSet_t createQnnTensorSet(const Qnn_TensorSetVersion_t version) {
+    Qnn_TensorSet_t tensorSet = QNN_TENSOR_SET_INIT;
+    tensorSet.version         = version;
+    if (version == QNN_TENSOR_SET_VERSION_1) {
+        tensorSet.v1 = QNN_TENSOR_SET_V1_INIT;
+    }
+    return tensorSet;
+}
+
+inline uint32_t getQnnTensorSetNumInputs(const Qnn_TensorSet_t& tensorSet) {
+    if (tensorSet.version == QNN_TENSOR_SET_VERSION_1) {
+        return tensorSet.v1.numInputs;
+    }
+    return 0;
+}
+
+inline uint32_t getQnnTensorSetNumInputs(const Qnn_TensorSet_t* tensorSet) {
+    return getQnnTensorSetNumInputs(*tensorSet);
+}
+
+inline Qnn_Tensor_t* getQnnTensorSetInputTensors(const Qnn_TensorSet_t& tensorSet) {
+    if (tensorSet.version == QNN_TENSOR_SET_VERSION_1) {
+        return tensorSet.v1.inputs;
+    }
+    return 0;
+}
+
+inline Qnn_Tensor_t* getQnnTensorSetInputTensors(const Qnn_TensorSet_t* tensorSet) {
+    return getQnnTensorSetInputTensors(*tensorSet);
+}
+
+inline uint32_t getQnnTensorSetNumOutputs(const Qnn_TensorSet_t& tensorSet) {
+    if (tensorSet.version == QNN_TENSOR_SET_VERSION_1) {
+        return tensorSet.v1.numOutputs;
+    }
+    return 0;
+}
+
+inline uint32_t getQnnTensorSetNumOutputs(const Qnn_TensorSet_t* tensorSet) {
+    return getQnnTensorSetNumOutputs(*tensorSet);
+}
+
+inline Qnn_Tensor_t* getQnnTensorSetOutputTensors(const Qnn_TensorSet_t& tensorSet) {
+    if (tensorSet.version == QNN_TENSOR_SET_VERSION_1) {
+        return tensorSet.v1.outputs;
+    }
+    return 0;
+}
+
+inline Qnn_Tensor_t* getQnnTensorSetOutputTensors(const Qnn_TensorSet_t* tensorSet) {
+    return getQnnTensorSetOutputTensors(*tensorSet);
+}
+
+inline void setQnnTensorSetInputTensors(
+        Qnn_TensorSet_t& tensorSet,
+        Qnn_Tensor_t*    inputTensors,
+        uint32_t const   numInputs
+) {
+    if (tensorSet.version == QNN_TENSOR_SET_VERSION_1) {
+        tensorSet.v1.inputs    = inputTensors;
+        tensorSet.v1.numInputs = numInputs;
+    }
+}
+
+inline void setQnnTensorSetInputTensors(
+        Qnn_TensorSet_t* tensorSet,
+        Qnn_Tensor_t*    inputTensors,
+        uint32_t const   numInputs
+) {
+    setQnnTensorSetInputTensors(*tensorSet, inputTensors, numInputs);
+}
+
+inline void setQnnTensorSetOutputTensors(
+        Qnn_TensorSet_t& tensorSet,
+        Qnn_Tensor_t*    outputTensors,
+        const uint32_t   numOutputs
+) {
+    if (tensorSet.version == QNN_TENSOR_SET_VERSION_1) {
+        tensorSet.v1.outputs    = outputTensors;
+        tensorSet.v1.numOutputs = numOutputs;
+    }
+}
+
+inline void setQnnTensorSetOutputTensors(
+        Qnn_TensorSet_t* tensorSet,
+        Qnn_Tensor_t*    outputTensors,
+        const uint32_t   numOutputs
+) {
+    setQnnTensorSetOutputTensors(*tensorSet, outputTensors, numOutputs);
+}
+
+// Creator for QNN Op Config
+#define QNN_OP_CFG_CREATE(version) createQnnOpConfig(version)
+
+// Accessors for QNN Op Config
+#define QNN_OP_CFG_GET_NAME(opConfig)         getQnnOpConfigName(opConfig)
+#define QNN_OP_CFG_GET_PACKAGE_NAME(opConfig) getQnnOpConfigPackageName(opConfig)
+#define QNN_OP_CFG_GET_TYPE_NAME(opConfig)    getQnnOpConfigTypeName(opConfig)
+#define QNN_OP_CFG_GET_NUM_PARAMS(opConfig)   getQnnOpConfigNumParams(opConfig)
+#define QNN_OP_CFG_GET_PARAMS(opConfig)       getQnnOpConfigParams(opConfig)
+#define QNN_OP_CFG_GET_NUM_INPUTS(opConfig)   getQnnOpConfigNumInputs(opConfig)
+#define QNN_OP_CFG_GET_INPUTS(opConfig)       getQnnOpConfigInputs(opConfig)
+#define QNN_OP_CFG_GET_NUM_OUTPUTS(opConfig)  getQnnOpConfigNumOutputs(opConfig)
+#define QNN_OP_CFG_GET_OUTPUTS(opConfig)      getQnnOpConfigOutputs(opConfig)
+
+// Modifiers for QNN Op Config
+#define QNN_OP_CFG_SET_NAME(opConfig, value)         setQnnOpConfigName(opConfig, value)
+#define QNN_OP_CFG_SET_PACKAGE_NAME(opConfig, value) setQnnOpConfigPackageName(opConfig, value)
+#define QNN_OP_CFG_SET_TYPE_NAME(opConfig, value)    setQnnOpConfigTypeName(opConfig, value)
+#define QNN_OP_CFG_SET_PARAMS(opConfig, numOfParams, params)                                       \
+    setQnnOpConfigParams(opConfig, numOfParams, params)
+#define QNN_OP_CFG_SET_INPUTS(opConfig, numOfInputs, inputTensors)                                 \
+    setQnnOpConfigInputs(opConfig, numOfInputs, inputTensors)
+#define QNN_OP_CFG_SET_OUTPUTS(opConfig, numOfOutputs, outputTensors)                              \
+    setQnnOpConfigOutputs(opConfig, numOfOutputs, outputTensors)
+
+// Creator for QNN Tensor
+#define QNN_TENSOR_CREATE(version) createQnnTensor(version)
+
+// Accessors for QNN Tensor
+#define QNN_TENSOR_GET_ID(tensor)                    getQnnTensorId(tensor)
+#define QNN_TENSOR_GET_NAME(tensor)                  getQnnTensorName(tensor)
+#define QNN_TENSOR_GET_TYPE(tensor)                  getQnnTensorType(tensor)
+#define QNN_TENSOR_GET_DATA_FORMAT(tensor)           getQnnTensorDataFormat(tensor)
+#define QNN_TENSOR_GET_DATA_TYPE(tensor)             getQnnTensorDataType(tensor)
+#define QNN_TENSOR_GET_QUANT_PARAMS(tensor)          getQnnTensorQuantParams(tensor)
+#define QNN_TENSOR_GET_RANK(tensor)                  getQnnTensorRank(tensor)
+#define QNN_TENSOR_GET_DIMENSIONS(tensor)            getQnnTensorDimensions(tensor)
+#define QNN_TENSOR_GET_IS_DYNAMIC_DIMENSIONS(tensor) getQnnTensorIsDynamicDimensions(tensor)
+#define QNN_TENSOR_GET_SPARSE_PARAMS(tensor)         getQnnTensorSparseParams(tensor)
+#define QNN_TENSOR_GET_MEM_TYPE(tensor)              getQnnTensorMemType(tensor)
+#define QNN_TENSOR_GET_CLIENT_BUF(tensor)            getQnnTensorClientBuf(tensor)
+#define QNN_TENSOR_GET_MEM_HANDLE(tensor)            getQnnTensorMemHandle(tensor)
+
+// Modifiers for QNN Tensor
+#define QNN_TENSOR_SET_ID(tensor, value)           setQnnTensorId(tensor, value)
+#define QNN_TENSOR_SET_NAME(tensor, value)         setQnnTensorName(tensor, value)
+#define QNN_TENSOR_SET_TYPE(tensor, value)         setQnnTensorType(tensor, value)
+#define QNN_TENSOR_SET_DATA_FORMAT(tensor, value)  setQnnTensorDataFormat(tensor, value)
+#define QNN_TENSOR_SET_DATA_TYPE(tensor, value)    setQnnTensorDataType(tensor, value)
+#define QNN_TENSOR_SET_QUANT_PARAMS(tensor, value) setQnnTensorQuantParams(tensor, value)
+#define QNN_TENSOR_SET_RANK(tensor, value)         setQnnTensorRank(tensor, value)
+#define QNN_TENSOR_SET_DIMENSIONS(tensor, value)   setQnnTensorDimensions(tensor, value)
+#define QNN_TENSOR_SET_IS_DYNAMIC_DIMENSIONS(tensor, value)                                        \
+    setQnnTensorIsDynamicDimensions(tensor, value)
+#define QNN_TENSOR_SET_SPARSE_PARAMS(tensor, value) setQnnTensorSparseParams(tensor, value)
+#define QNN_TENSOR_SET_MEM_TYPE(tensor, value)      setQnnTensorMemType(tensor, value)
+#define QNN_TENSOR_SET_CLIENT_BUF(tensor, value)    setQnnTensorClientBuf(tensor, value)
+#define QNN_TENSOR_SET_MEM_HANDLE(tensor, value)    setQnnTensorMemHandle(tensor, value)
+
+// Creator for QNN Tensor Set
+#define QNN_TENSORSET_CREATE(version) createQnnTensorSet(version)
+
+// Accessors for QNN Tensor Set
+#define QNN_TENSORSET_GET_NUM_INPUTS(tensorSet)     getQnnTensorSetNumInputs(tensorSet)
+#define QNN_TENSORSET_GET_INPUT_TENSORS(tensorSet)  getQnnTensorSetInputTensors(tensorSet)
+#define QNN_TENSORSET_GET_NUM_OUTPUTS(tensorSet)    getQnnTensorSetNumOutputs(tensorSet)
+#define QNN_TENSORSET_GET_OUTPUT_TENSORS(tensorSet) getQnnTensorSetOutputTensors(tensorSet)
+
+// Modifiers for QNN Tensor Set
+#define QNN_TENSORSET_SET_INPUT_TENSORS(tensorSet, inputTensors, numInputs)                        \
+    setQnnTensorSetInputTensors(tensorSet, inputTensors, numInputs)
+#define QNN_TENSORSET_SET_OUTPUT_TENSORS(tensorSet, outputTensors, numOutputs)                     \
+    setQnnTensorSetOutputTensors(tensorSet, outputTensors, numOutputs)
+
+inline bool isQnnTensorV1Compatible(const Qnn_Tensor_t& tensor) {
+    if (tensor.version == QNN_TENSOR_VERSION_2) {
+        if (tensor.v2.isDynamicDimensions != NULL) {
+            return false;
+        }
+
+        if (tensor.v2.dataFormat == QNN_TENSOR_DATA_FORMAT_SPARSE) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+inline bool isQnnTensorV1Compatible(const Qnn_Tensor_t* const tensor) {
+    return isQnnTensorV1Compatible(*tensor);
+}
+
+inline bool isQnnTensorV1Compatible(const Qnn_OpConfig_t& opConfig) {
+    if ((QNN_OP_CFG_GET_INPUTS(opConfig) != NULL) && (QNN_OP_CFG_GET_NUM_INPUTS(opConfig) > 0u)) {
+        for (uint32_t tensorIdx = 0u; tensorIdx < QNN_OP_CFG_GET_NUM_INPUTS(opConfig);
+             tensorIdx++) {
+            if (!isQnnTensorV1Compatible(QNN_OP_CFG_GET_INPUTS(opConfig)[tensorIdx])) {
+                return false;
+            }
+        }
+    }
+    if ((QNN_OP_CFG_GET_OUTPUTS(opConfig) != NULL) && (QNN_OP_CFG_GET_NUM_OUTPUTS(opConfig) > 0u)) {
+        for (uint32_t tensorIdx = 0u; tensorIdx < QNN_OP_CFG_GET_NUM_OUTPUTS(opConfig);
+             tensorIdx++) {
+            if (!isQnnTensorV1Compatible(QNN_OP_CFG_GET_OUTPUTS(opConfig)[tensorIdx])) {
+                return false;
+            }
+        }
+    }
+    if ((QNN_OP_CFG_GET_PARAMS(opConfig) != NULL) && (QNN_OP_CFG_GET_NUM_PARAMS(opConfig) > 0)) {
+        for (uint32_t paramIdx = 0u; paramIdx < QNN_OP_CFG_GET_NUM_PARAMS(opConfig); paramIdx++) {
+            const Qnn_Param_t& param = QNN_OP_CFG_GET_PARAMS(opConfig)[paramIdx];
+            if (QNN_PARAMTYPE_TENSOR == param.paramType) {
+                if (!isQnnTensorV1Compatible(param.tensorParam)) {
+                    return false;
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+inline bool isQnnTensorV1Compatible(const Qnn_OpConfig_t* const opConfig) {
+    return isQnnTensorV1Compatible(*opConfig);
+}
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/RpcMem.cpp b/Genie/Genie/src/qualla/engines/qnn-api/RpcMem.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b167620cb80f9b004b0c63236ff689cb88708e36
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/RpcMem.cpp
@@ -0,0 +1,481 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "QnnMem.h"
+#include "QnnHtpMem.h"
+#include "RpcMem.hpp"
+#include "QnnTypeMacros.hpp"
+#include "dlwrap.hpp"
+
+#define RPCMEM_HEAP_ID_SYSTEM 25
+#define RPCMEM_DEFAULT_FLAGS  1
+
+#if 1
+    #define TRACE_MEMORY_ALLOC QNN_DEBUG
+#else
+    #define TRACE_MEMORY_ALLOC(fmt, ...)
+#endif
+
+RpcMem::RpcMem(Qnn_ContextHandle_t contextHandle, QNN_INTERFACE_VER_TYPE* qnnInterface)
+    : m_libCdspRpc(nullptr), m_rpcMemAlloc(nullptr), m_rpcMemFree(nullptr), m_rpcMemToFd(nullptr),
+      m_qnnInterface(qnnInterface), m_contextHandle(contextHandle) {
+    (void)m_contextHandle;
+}
+
+bool RpcMem::initialize() {
+    // On Android, 32-bit and 64-bit libcdsprpc.so can be found at /vendor/lib and /vendor/lib64 respectively.
+    // On Windows, it's installed into something like this
+    //      c:\Windows\System32\DriverStore\FileRepository\qcnspmcdm8380.inf_arm64_30b9cc995571de6a\libcdsprpc.dll
+#ifdef _WIN32
+    const char* dsprpc_so = "libcdsprpc.dll";
+#else
+    const char* dsprpc_so = "libcdsprpc.so";
+#endif
+
+    m_libCdspRpc = dlopen(dsprpc_so, RTLD_NOW | RTLD_LOCAL);
+    if (nullptr == m_libCdspRpc) {
+        QNN_ERROR("Unable to load backend. dlerror(): %s", dlerror());
+        return false;
+    }
+    m_rpcMemAlloc = (RpcMemAllocFn_t)dlsym(m_libCdspRpc, "rpcmem_alloc");
+    m_rpcMemFree  = (RpcMemFreeFn_t)dlsym(m_libCdspRpc, "rpcmem_free");
+    m_rpcMemToFd  = (RpcMemToFdFn_t)dlsym(m_libCdspRpc, "rpcmem_to_fd");
+    if (nullptr == m_rpcMemAlloc || nullptr == m_rpcMemFree || nullptr == m_rpcMemToFd) {
+        QNN_ERROR("Unable to access symbols in libcdsprpc. dlerror(): %s", dlerror());
+        return false;
+    }
+
+    return true;
+}
+
+RpcMem::~RpcMem() {
+    if (m_libCdspRpc) {
+        QNN_DEBUG("Closing libcdsprpc.so handle");
+        dlclose(m_libCdspRpc);
+    }
+}
+
+RpcMemTensorData* RpcMem::getRpcMemTensorData(Qnn_Tensor_t* tensor) {
+    if (tensor == nullptr) return nullptr;
+    Qnn_MemHandle_t mem_handle = QNN_TENSOR_GET_MEM_HANDLE(tensor);
+    if (mem_handle == nullptr) return nullptr;
+    return &m_memHandleToRpcMem.at(mem_handle);
+}
+
+void* RpcMem::getBuffer(Qnn_Tensor_t* tensor) {
+    RpcMemTensorData* data = getRpcMemTensorData(tensor);
+    if (data == nullptr) {
+        QNN_ERROR("getBuffer : Couldn't find tensor %p", tensor);
+        return nullptr;
+    }
+    return data->memPointer;
+}
+
+int RpcMem::getFd(Qnn_Tensor_t* tensor) {
+    RpcMemTensorData* data = getRpcMemTensorData(tensor);
+    if (data == nullptr) {
+        QNN_ERROR("getFd : Couldn't find tensor %p", tensor);
+        return -1;
+    }
+    return data->fd;
+}
+
+size_t RpcMem::getOffset(Qnn_Tensor_t* tensor) {
+    RpcMemTensorData* data = getRpcMemTensorData(tensor);
+    if (data == nullptr) {
+        QNN_ERROR("getOffset : Couldn't find tensor %p", tensor);
+        return 0;
+    }
+    return data->offset;
+}
+
+size_t RpcMem::getBufferSize(Qnn_Tensor_t* tensor) {
+    RpcMemTensorData* data = getRpcMemTensorData(tensor);
+    if (data == nullptr) {
+        QNN_ERROR("getBufferSize : Couldn't find tensor %p", tensor);
+        return 0;
+    }
+    return data->size;
+};
+
+size_t RpcMem::getTotalBufferSize(Qnn_Tensor_t* tensor) {
+    RpcMemTensorData* data = getRpcMemTensorData(tensor);
+    if (data == nullptr) {
+        QNN_ERROR("getTotalBufferSize : Couldn't find tensor %p", tensor);
+        return 0;
+    }
+    return data->totalBufferSize;
+}
+
+bool RpcMem::allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) {
+    if (m_libCdspRpc == nullptr) {
+        QNN_ERROR("RpcMem not initialized");
+        return false;
+    }
+    if (!tensor) {
+        QNN_ERROR("Received nullptr for tensor");
+        return false;
+    }
+    if (m_tensorToRpcMem.find(tensor) != m_tensorToRpcMem.end()) {
+        QNN_ERROR("Tensor already allocated");
+        return false;
+    }
+
+    auto memPointer = m_rpcMemAlloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, tensorDataSize);
+    auto status     = true;
+    if (!memPointer) {
+        QNN_ERROR("rpcmem_alloc failure");
+        status = false;
+    }
+    int memfd = -1;
+    if (status == true) {
+        memfd = m_rpcMemToFd(memPointer);
+        if (memfd == -1) {
+            QNN_ERROR("rpcmem_to_fd failure");
+            status = false;
+        }
+    }
+    if (status == true) {
+        Qnn_MemDescriptor_t memDescriptor = {
+                {QNN_TENSOR_GET_RANK(tensor), QNN_TENSOR_GET_DIMENSIONS(tensor), nullptr},
+                QNN_TENSOR_GET_DATA_TYPE(tensor),
+                QNN_MEM_TYPE_ION,
+                {{-1}}
+        };
+        memDescriptor.ionInfo.fd = memfd;
+        QNN_TENSOR_SET_MEM_TYPE(tensor, QNN_TENSORMEMTYPE_MEMHANDLE);
+        QNN_TENSOR_SET_MEM_HANDLE(tensor, nullptr);
+
+        Qnn_MemHandle_t memHandle = QNN_TENSOR_GET_MEM_HANDLE(tensor);
+        if (QNN_SUCCESS != m_qnnInterface->memRegister(
+                                   m_contextHandle,
+                                   &memDescriptor,
+                                   1,
+                                   &(memHandle)
+                           )) {
+            const char* tname = QNN_TENSOR_GET_NAME(tensor);
+            QNN_ERROR("memRegister fail %s (ctx=%p fd=%d)", tname, m_contextHandle, memfd);
+            status = false;
+        }
+        QNN_TENSOR_SET_MEM_HANDLE(tensor, memHandle);
+    }
+    if (status == true) {
+        m_tensorToRpcMem.insert({tensor, RpcMemTensorData(memfd, memPointer, tensorDataSize)});
+    }
+    if (status == false) {
+        if (m_rpcMemFree) {
+            m_rpcMemFree(memPointer);
+        }
+    }
+    return status;
+}
+
+bool RpcMem::freeTensorBuffer(Qnn_Tensor_t* tensor) {
+    if (!tensor) {
+        QNN_ERROR("Received nullptr for tensor");
+        return false;
+    }
+
+    if (m_sameMemoryFreeTensors.find(tensor) != m_sameMemoryFreeTensors.end()) {
+        if (m_tensorToRpcMem.find(tensor) == m_tensorToRpcMem.end()) {
+            QNN_ERROR("Tensor not found");
+            return false;
+        }
+        m_tensorToRpcMem.erase(tensor);
+    } else {
+        auto memHandle = QNN_TENSOR_GET_MEM_HANDLE(tensor);
+        if (QNN_SUCCESS != m_qnnInterface->memDeRegister(&memHandle, 1)) {
+            QNN_ERROR("Failed to deregister ion memory with the backend");
+            return false;
+        }
+        QNN_TENSOR_SET_MEM_TYPE(tensor, QNN_TENSORMEMTYPE_UNDEFINED);
+        if (m_tensorToRpcMem.find(tensor) == m_tensorToRpcMem.end()) {
+            QNN_ERROR("Tensor not found");
+            return false;
+        }
+        if (m_rpcMemFree) {
+            m_rpcMemFree(m_tensorToRpcMem[tensor].memPointer);
+        }
+        m_tensorToRpcMem.erase(tensor);
+    }
+
+    return true;
+}
+
+bool RpcMem::useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) {
+    if (nullptr == dest || nullptr == src) {
+        QNN_ERROR("Received nullptr");
+        return false;
+    }
+    if (m_tensorToRpcMem.find(src) == m_tensorToRpcMem.end()) {
+        QNN_ERROR("Src Tensor not found");
+        return false;
+    }
+
+    if (false == freeTensorBuffer(dest)) {
+        return false;
+    }
+
+    QNN_TENSOR_SET_MEM_TYPE(dest, QNN_TENSOR_GET_MEM_TYPE(src));
+    QNN_TENSOR_SET_MEM_HANDLE(dest, QNN_TENSOR_GET_MEM_HANDLE(src));
+    m_tensorToRpcMem.insert({dest, m_tensorToRpcMem[src]});
+    m_sameMemoryFreeTensors.insert(dest);
+
+    return true;
+}
+
+bool RpcMem::useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset) {
+    if (nullptr == dest || nullptr == src) {
+        QNN_ERROR("Received nullptr");
+        return false;
+    }
+    if (m_tensorToRpcMem.find(src) == m_tensorToRpcMem.end()) {
+        QNN_ERROR("Src Tensor not found");
+        return false;
+    }
+
+    if (false == freeTensorBuffer(dest)) {
+        return false;
+    }
+
+    QNN_TENSOR_SET_MEM_TYPE(dest, QNN_TENSOR_GET_MEM_TYPE(src));
+    QNN_TENSOR_SET_MEM_HANDLE(dest, QNN_TENSOR_GET_MEM_HANDLE(src));
+    m_tensorToRpcMem.insert({dest, m_tensorToRpcMem[src]});
+    m_sameMemoryFreeTensors.insert(dest);
+
+    return true;
+}
+
+bool RpcMem::useExternalMemory(Qnn_Tensor_t* dest, void* extMem) {
+    QNN_ERROR("We don't support external memory feature for shared buffers yet!");
+    return false;
+}
+
+void* RpcMem::allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) {
+    *fd = -1;
+    if (m_libCdspRpc == nullptr) {
+        QNN_ERROR("RpcMem not initialized for fused buffer");
+        return nullptr;
+    }
+
+    void* memPointer = m_rpcMemAlloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, bufferSize);
+    if (!memPointer) {
+        QNN_ERROR("Not able to allocate fused buffer of size: %lu", (unsigned long)bufferSize);
+        return nullptr;
+    }
+
+    m_fusedBuffers.push_back({memPointer, bufferSize});
+    QNN_DEBUG(
+            "Successfully allocated fused buffer at %p with size %lu",
+            memPointer,
+            (unsigned long)bufferSize
+    );
+
+    if ((*fd = m_rpcMemToFd(memPointer)) == -1) {
+        QNN_ERROR(
+                "Not able to get fd for the fused buffer of size: %lu", (unsigned long)bufferSize
+        );
+        return nullptr;
+    }
+
+    QNN_DEBUG("Retrieved fd %d for pointer %p", *fd, memPointer);
+    return memPointer;
+}
+
+bool RpcMem::allocateBuffers(
+        const std::map<int, std::map<std::string, size_t>>& allocs_per_chunk,
+        std::map<std::string, std::pair<int, size_t>>&      tensor_offsets
+) {
+    int    alloc_chunk_idx  = m_fusedBuffers.size();
+    int    num_alloc_chunks = 0;
+    size_t total_alloc_size = 0;
+
+    for (auto& [_, tensor_sizes] : allocs_per_chunk) {
+        // Calculate total allocation chunk size
+        size_t alloc_chunk_size = 0;
+        for (const auto& [tensor_name, tensor_size] : tensor_sizes) {
+            tensor_offsets[tensor_name] = {alloc_chunk_idx, alloc_chunk_size};
+            alloc_chunk_size += tensor_size;
+        }
+
+        // Allocate chunk for this unique context set
+        if (alloc_chunk_size <= 0) {
+            QNN_ERROR("Unexpected chunk size detected. Please re-check IO allocations");
+            return false;
+        }
+
+        m_fusedFds.push_back(0);
+        if (!allocateTensorFusedBuffer(alloc_chunk_size, &m_fusedFds.back())) //
+            return false;
+        total_alloc_size += alloc_chunk_size;
+        alloc_chunk_idx++;
+        num_alloc_chunks++;
+    }
+    QNN_INFO(
+            "Allocated total size = %lu across %d buffers",
+            (unsigned long)total_alloc_size,
+            num_alloc_chunks
+    );
+    return true;
+}
+
+bool RpcMem::mapFusedBufferOffset(
+        Qnn_Tensor_t*       tensor,
+        size_t              tensorDataSize,
+        int32_t             fd,
+        uint32_t            offset,
+        uint64_t            totalBufferSize,
+        void*               memPointer,
+        Qnn_ContextHandle_t contextHandle
+) {
+    if (m_libCdspRpc == nullptr) {
+        QNN_ERROR("RpcMem not initialized");
+        return false;
+    }
+    if (!tensor) {
+        QNN_ERROR("Received nullptr for tensor");
+        return false;
+    }
+
+    Qnn_ErrorHandle_t ret;
+    const char*       tname = QNN_TENSOR_GET_NAME(tensor);
+
+    // Check if tensor already has a memHandle assigned
+    Qnn_MemHandle_t cur_mem_handle = QNN_TENSOR_GET_MEM_HANDLE(tensor);
+    if (cur_mem_handle != nullptr) {
+        // Check if memHandle is already identical to requested buffer and offset
+        RpcMemTensorData& cur_rpc_mem_data = m_memHandleToRpcMem.at(cur_mem_handle);
+        if (cur_rpc_mem_data.fd == fd && cur_rpc_mem_data.offset == offset) {
+            return true;
+        }
+
+        // updated offset, deregister previous mem_handle
+        if (tensorDataSize == 0) tensorDataSize = cur_rpc_mem_data.size;
+        // clang-format off
+        TRACE_MEMORY_ALLOC( "memDeRegister %-20s (fd=%d offset=%lu) memHandle=%p",
+            tname, cur_rpc_mem_data.fd, cur_rpc_mem_data.offset, cur_mem_handle);
+        // clang-format on
+        m_memHandleToRpcMem.erase(cur_mem_handle);
+        if ((ret = m_qnnInterface->memDeRegister(&cur_mem_handle, 1)) != QNN_SUCCESS) {
+            QNN_ERROR(
+                    "memDeRegister ERROR(%lu) - %s memHandle=%p",
+                    (unsigned long)ret,
+                    tname,
+                    cur_mem_handle
+            );
+            return false;
+        }
+    } else {
+        // For inital tensors, we need to check if the tensor can re-use a memHandle
+        // from another tensor in the same context
+        auto memConfig = std::make_tuple(fd, offset, contextHandle);
+        if (memConfigList.contains(memConfig)) {
+            auto&           parentTensor    = memConfigList[memConfig];
+            Qnn_MemHandle_t parentMemHandle = QNN_TENSOR_GET_MEM_HANDLE(parentTensor);
+            QNN_TENSOR_SET_MEM_TYPE(tensor, QNN_TENSORMEMTYPE_MEMHANDLE);
+            QNN_TENSOR_SET_MEM_HANDLE(tensor, parentMemHandle);
+            TRACE_MEMORY_ALLOC("%-20s : Mapping to memHandle %p", tname, parentMemHandle);
+            return true;
+        }
+    }
+
+    // Register a new memHandle based on function arguments
+    QnnMemHtp_Descriptor_t htp_mem_desciptor    = {QNN_HTP_MEM_SHARED_BUFFER, totalBufferSize, {0}};
+    htp_mem_desciptor.sharedBufferConfig.fd     = fd;
+    htp_mem_desciptor.sharedBufferConfig.offset = offset;
+
+    Qnn_MemDescriptor_t mem_descriptor = {
+            {QNN_TENSOR_GET_RANK(tensor), QNN_TENSOR_GET_DIMENSIONS(tensor), nullptr},
+            QNN_TENSOR_GET_DATA_TYPE(tensor),
+            QNN_MEM_TYPE_CUSTOM,
+            {{-1}}
+    };
+    mem_descriptor.customInfo = &htp_mem_desciptor;
+
+    Qnn_MemHandle_t mem_handle = nullptr;
+    ret = m_qnnInterface->memRegister(contextHandle, &mem_descriptor, 1, &mem_handle);
+    if (ret != QNN_SUCCESS) {
+        QNN_ERROR("%-20s (ctx=%p fd=%d offset=%u)", tname, contextHandle, fd, offset);
+        QNN_ERROR("memRegister ERROR(%lu)", (unsigned long)ret);
+        return false;
+    }
+
+    // clang-format off
+    TRACE_MEMORY_ALLOC("%-20s (ctx=%p fd=%d offset=%u) memPointer=%p memHandle=%p",
+        tname, contextHandle, fd, offset, ((uint8_t*)memPointer) + offset, mem_handle);
+    // clang-format on
+    m_memHandleToRpcMem[mem_handle] = RpcMemTensorData(
+            fd, ((uint8_t*)memPointer) + offset, tensorDataSize, totalBufferSize, offset
+    );
+
+    QNN_TENSOR_SET_MEM_TYPE(tensor, QNN_TENSORMEMTYPE_MEMHANDLE);
+    QNN_TENSOR_SET_MEM_HANDLE(tensor, mem_handle);
+    if (cur_mem_handle == nullptr) // Cache memory config for initial memRegisters only
+        memConfigList[std::make_tuple(fd, offset, contextHandle)] = tensor;
+
+    return true;
+}
+
+bool RpcMem::mapFusedBufferOffset(
+        Qnn_Tensor_t*       tensor,
+        int                 alloc_idx,
+        size_t              offset,
+        Qnn_ContextHandle_t ctx,
+        size_t              size
+) {
+    return mapFusedBufferOffset(
+            tensor,
+            size,
+            m_fusedFds[alloc_idx],
+            offset,
+            m_fusedBuffers[alloc_idx].second,
+            m_fusedBuffers[alloc_idx].first,
+            ctx
+    );
+}
+
+bool RpcMem::deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) {
+    if (!tensor) {
+        QNN_ERROR("Received nullptr for tensor");
+        return false;
+    }
+
+    if (m_tensorToRpcMem.find(tensor) == m_tensorToRpcMem.end()) {
+        QNN_ERROR("Tensor not found");
+        return false;
+    }
+
+    // We are not freeing memhandles here since they are already freed when
+    // freeContext() gets called in the destructor of QnnApi class which
+    // happens before this point
+
+    // Qnn_MemHandle_t memHandle = QNN_TENSOR_GET_MEM_HANDLE(tensor);
+    // QNN_ERROR("Interface handle %p memhandle %p", m_qnnInterface, memHandle);
+    // if (QNN_SUCCESS != m_qnnInterface->memDeRegister(&memHandle, 1)) {
+    //   QNN_ERROR("Failed to deregister ion memory with the backend");
+    //   return false;
+    // }
+
+    QNN_TENSOR_SET_MEM_TYPE(tensor, QNN_TENSORMEMTYPE_UNDEFINED);
+    QNN_TENSOR_SET_MEM_HANDLE(tensor, nullptr);
+    m_tensorToRpcMem.erase(tensor);
+    return true;
+}
+
+void RpcMem::freeFusedBuffers() {
+    // for (auto& memHandle : m_orphanedMemHandles) {
+    //   if (QNN_SUCCESS != m_qnnInterface->memDeRegister(&memHandle, 1)) {
+    //     QNN_ERROR("Failed to deregister ion memory with the backend");
+    //   }
+    // }
+
+    for (auto& [mem_ptr, buffer_size] : m_fusedBuffers) {
+        QNN_DEBUG("Freeing fused buffer %p (size=%lu)", mem_ptr, buffer_size);
+        m_rpcMemFree(mem_ptr);
+    }
+}
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/RpcMem.hpp b/Genie/Genie/src/qualla/engines/qnn-api/RpcMem.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..abd772a6bc0ba70fcc0d54afbb1aca2edf3840f9
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/RpcMem.hpp
@@ -0,0 +1,115 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <unordered_set>
+
+#include "IBufferAlloc.hpp"
+#include "QnnInterface.h"
+#include "Log.hpp"
+
+typedef void* (*RpcMemAllocFn_t)(int, uint32_t, int);
+typedef void (*RpcMemFreeFn_t)(void*);
+typedef int (*RpcMemToFdFn_t)(void*);
+
+struct RpcMemTensorData {
+    int    fd;
+    void*  memPointer;
+    size_t size;
+    size_t totalBufferSize;
+    size_t offset;
+    RpcMemTensorData() : fd(-1), memPointer(nullptr), size(0) {}
+    RpcMemTensorData(int fdIn, void* memPointerIn, size_t sizeIn)
+        : fd(fdIn), memPointer(memPointerIn), size(sizeIn) {}
+    RpcMemTensorData(
+            int    fdIn,
+            void*  memPointerIn,
+            size_t sizeIn,
+            size_t totalBufferSizeIn,
+            size_t offsetIn
+    )
+        : fd(fdIn), memPointer(memPointerIn), size(sizeIn), totalBufferSize(totalBufferSizeIn),
+          offset(offsetIn) {}
+};
+
+class RpcMem final : public IBufferAlloc {
+  public:
+    RpcMem(Qnn_ContextHandle_t contextHandle, QNN_INTERFACE_VER_TYPE* qnnInterface);
+    // Disable copy constructors, r-value referencing, etc
+    RpcMem(const RpcMem&)            = delete;
+    RpcMem& operator=(const RpcMem&) = delete;
+    RpcMem(RpcMem&&)                 = delete;
+    RpcMem& operator=(RpcMem&&)      = delete;
+    bool    initialize() override;
+    void*   getBuffer(Qnn_Tensor_t* tensor) override;
+    int     getFd(Qnn_Tensor_t* tensor) override;
+
+    size_t getOffset(Qnn_Tensor_t* tensor) override;
+
+    size_t getBufferSize(Qnn_Tensor_t* tensor) override;
+
+    size_t getTotalBufferSize(Qnn_Tensor_t* tensor) override;
+
+    bool allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) override;
+
+    bool freeTensorBuffer(Qnn_Tensor_t* tensor) override;
+    bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) override;
+    bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset) override;
+
+    bool useExternalMemory(Qnn_Tensor_t* dest, void* extMem) override;
+
+    void* allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) override;
+    bool  allocateBuffers(
+             const std::map<int, std::map<std::string, size_t>>& allocs_per_chunk,
+             std::map<std::string, std::pair<int, size_t>>&      tensor_offsets
+     ) override;
+
+    bool mapFusedBufferOffset(
+            Qnn_Tensor_t*       tensor,
+            size_t              tensorDataSize,
+            int32_t             fd,
+            uint32_t            offset,
+            uint64_t            totalBufferSize,
+            void*               memPointer,
+            Qnn_ContextHandle_t contextHandle
+    ) override;
+    bool deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) override;
+    void freeFusedBuffers() override;
+    bool mapFusedBufferOffset(
+            Qnn_Tensor_t*       tensor,
+            int                 alloc_idx,
+            size_t              offset,
+            Qnn_ContextHandle_t ctx,
+            size_t              size
+    ) override;
+    virtual ~RpcMem();
+
+  private:
+    RpcMemTensorData* getRpcMemTensorData(Qnn_Tensor_t* tensor);
+
+    // Pointer to the dlopen'd libcdsprpc.so shared library which contains
+    // rpcmem_alloc, rpcmem_free, rpcmem_to_fd APIs
+    void* m_libCdspRpc;
+    // Function pointer to rpcmem_alloc
+    RpcMemAllocFn_t m_rpcMemAlloc;
+    // Function pointer to rpcmem_free
+    RpcMemFreeFn_t m_rpcMemFree;
+    // Function pointer to rpcmem_to_fd
+    RpcMemToFdFn_t          m_rpcMemToFd;
+    QNN_INTERFACE_VER_TYPE* m_qnnInterface;
+    Qnn_ContextHandle_t     m_contextHandle;
+
+    std::unordered_map<Qnn_Tensor_t*, RpcMemTensorData> m_tensorToRpcMem;
+    std::unordered_set<Qnn_Tensor_t*>                   m_sameMemoryFreeTensors;
+    std::vector<std::pair<void*, size_t>> m_fusedBuffers; // vector<<memPointer, bufferSize>>
+    std::vector<int32_t>                  m_fusedFds;
+    std::unordered_set<Qnn_MemHandle_t>   m_orphanedMemHandles;
+    std::unordered_map<Qnn_MemHandle_t, RpcMemTensorData>                 m_memHandleToRpcMem;
+    std::map<std::tuple<int, size_t, Qnn_ContextHandle_t>, Qnn_Tensor_t*> memConfigList;
+};
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/dlwrap.cpp b/Genie/Genie/src/qualla/engines/qnn-api/dlwrap.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..17df4ffa6d2f17d9b488ca7630c50bdb95dd79b6
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/dlwrap.cpp
@@ -0,0 +1,66 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifdef _WIN32
+
+    #pragma warning(disable : 4133 4996)
+
+    #include <inttypes.h>
+    #include <stdio.h>
+    #include <stdlib.h>
+    #include <string.h>
+    #include <windows.h>
+    #include <wchar.h>
+
+    #include "dlwrap.hpp"
+
+static const char* last_func;
+static long        last_err;
+
+void* dlopen(const char* dll, int flags) {
+    HINSTANCE h = LoadLibraryA(dll);
+    if (h == NULL) {
+        last_err  = GetLastError();
+        last_func = "dlopen";
+    }
+
+    return h;
+}
+
+int dlclose(void* h) {
+    if (!FreeLibrary((HINSTANCE)h)) {
+        last_err  = GetLastError();
+        last_func = "dlclose";
+        return -1;
+    }
+
+    return 0;
+}
+
+void* dlsym(void* h, const char* name) {
+    FARPROC p = GetProcAddress((HINSTANCE)h, name);
+    if (!p) {
+        last_err  = GetLastError();
+        last_func = "dlsym";
+    }
+    return (void*)(intptr_t)p;
+}
+
+const char* dlerror(void) {
+    static char str[88];
+
+    if (!last_err) return NULL;
+
+    sprintf(str, "%s error #%ld", last_func, last_err);
+    last_err  = 0;
+    last_func = NULL;
+
+    return str;
+}
+
+#endif // _WIN32
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/dlwrap.hpp b/Genie/Genie/src/qualla/engines/qnn-api/dlwrap.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..5170c06b5b3e04c493055985399490d9d7e30605
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/dlwrap.hpp
@@ -0,0 +1,33 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef DLWRAP_HPP
+#define DLWRAP_HPP
+
+#ifndef _WIN32
+
+    // Just include regular dlfcn
+    #include <dlfcn.h>
+
+#else // _WIN32
+
+    // Define basic set dl functions and flags
+
+    #define RTLD_GLOBAL 0x100
+    #define RTLD_LOCAL  0x000
+    #define RTLD_LAZY   0x000
+    #define RTLD_NOW    0x001
+
+void*       dlopen(const char* filename, int flag);
+int         dlclose(void* handle);
+void*       dlsym(void* handle, const char* name);
+const char* dlerror(void);
+
+#endif // _WIN32
+
+#endif // DLWRAP_HPP
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/qnn-utils.cpp b/Genie/Genie/src/qualla/engines/qnn-api/qnn-utils.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3d78469c0a21dadf6c106b9b3220386d58a8f8e4
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/qnn-utils.cpp
@@ -0,0 +1,104 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "qnn-utils.hpp"
+
+#include <string>
+#include <fstream>
+#include <filesystem>
+#include <sstream>
+#include "QnnApi.hpp"
+#include <fmt/format.h>
+
+namespace fs = std::filesystem;
+
+namespace qualla {
+namespace QnnUtils {
+  // Alternate implementation for bw() = lambda x: (10 * ((x & 0xf0)>>4) + (x & 0xf)) // 8
+  int DataType::bw() { return (_dtype == QNN_DATATYPE_UNDEFINED) ? -1 : QnnApi::getDataTypeSize(_dtype);}
+  int DataType::type() {return (_dtype == QNN_DATATYPE_UNDEFINED) ? -1 : _dtype >> 4; }
+
+  int32_t DataType::val() { return static_cast<int32_t>(_dtype); }
+
+bool writeRawData(void* data, size_t size, const fs::path& path) {
+    auto p = path.parent_path();
+    if (!fs::exists(p) && !fs::create_directories(p)) return false;
+
+    std::ofstream f(path, std::ofstream::binary);
+    f.write((char*)data, size);
+    f.close();
+
+    return true;
+}
+
+bool readRawData(void* data, size_t size, const fs::path& path) {
+    if (fs::file_size(path) != size) {
+        throw std::runtime_error(fmt::format(
+                "file size doesnot match: {} size {}, buf-size {}",
+                path.string(),
+                fs::file_size(path),
+                size
+        ));
+    }
+
+    std::ifstream f(path, std::ifstream::binary);
+    f.read((char*)data, size);
+    f.close();
+
+    return true;
+}
+
+void getQuantParamString(
+        const std::vector<QuantParam>& quantParam,
+        std::string&                   scale_string,
+        std::string&                   offset_string
+) {
+    std::ostringstream scales_s;
+    std::ostringstream offsets_s;
+    for (int i = 0; i < quantParam.size(); i++) {
+        if (i != 0) {
+            scales_s << ", ";
+            offsets_s << ", ";
+        }
+        scales_s << std::fixed << std::setprecision(20) << quantParam[i].scale;
+        offsets_s << quantParam[i].offset;
+    }
+    scale_string  = std::move(scales_s.str());
+    offset_string = std::move(offsets_s.str());
+}
+
+const char* DataType::str() {
+    // clang-format off
+    switch (_dtype) {
+    case QNN_DATATYPE_INT_8: return "QNN_DATATYPE_INT_8";
+    case QNN_DATATYPE_INT_16: return "QNN_DATATYPE_INT_16";
+    case QNN_DATATYPE_INT_32: return "QNN_DATATYPE_INT_32";
+    case QNN_DATATYPE_INT_64: return "QNN_DATATYPE_INT_64";
+    case QNN_DATATYPE_UINT_8: return "QNN_DATATYPE_UINT_8";
+    case QNN_DATATYPE_UINT_16: return "QNN_DATATYPE_UINT_16";
+    case QNN_DATATYPE_UINT_32: return "QNN_DATATYPE_UINT_32";
+    case QNN_DATATYPE_UINT_64: return "QNN_DATATYPE_UINT_64";
+    case QNN_DATATYPE_FLOAT_16: return "QNN_DATATYPE_FLOAT_16";
+    case QNN_DATATYPE_FLOAT_32: return "QNN_DATATYPE_FLOAT_32";
+    case QNN_DATATYPE_FLOAT_64: return "QNN_DATATYPE_FLOAT_64";
+    case QNN_DATATYPE_SFIXED_POINT_4: return "QNN_DATATYPE_SFIXED_POINT_4";
+    case QNN_DATATYPE_SFIXED_POINT_8: return "QNN_DATATYPE_SFIXED_POINT_8";
+    case QNN_DATATYPE_SFIXED_POINT_16: return "QNN_DATATYPE_SFIXED_POINT_16";
+    case QNN_DATATYPE_SFIXED_POINT_32: return "QNN_DATATYPE_SFIXED_POINT_32";
+    case QNN_DATATYPE_UFIXED_POINT_4: return "QNN_DATATYPE_UFIXED_POINT_4";
+    case QNN_DATATYPE_UFIXED_POINT_8: return "QNN_DATATYPE_UFIXED_POINT_8";
+    case QNN_DATATYPE_UFIXED_POINT_16: return "QNN_DATATYPE_UFIXED_POINT_16";
+    case QNN_DATATYPE_UFIXED_POINT_32: return "QNN_DATATYPE_UFIXED_POINT_32";
+    case QNN_DATATYPE_BOOL_8: return "QNN_DATATYPE_BOOL_8";
+    case QNN_DATATYPE_STRING: return "QNN_DATATYPE_STRING";
+    default: return "QNN_DATATYPE_UNDEFINED";
+    }
+    // clang-format on
+}
+} // namespace QnnUtils
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/qnn-api/qnn-utils.hpp b/Genie/Genie/src/qualla/engines/qnn-api/qnn-utils.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ca6efa2441cbf8548697412436ed7474ceb65cb6
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-api/qnn-utils.hpp
@@ -0,0 +1,157 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#ifdef _MSC_VER
+    #pragma warning(disable : 4068)
+#endif
+
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <filesystem>
+#include "QnnApiUtils.hpp"
+#include "QnnInterface.h"
+
+namespace qualla {
+
+namespace QnnUtils {
+class DataType {
+  private:
+    Qnn_DataType_t _dtype{QNN_DATATYPE_UNDEFINED};
+
+  public:
+    DataType() = default;
+    DataType(const Qnn_Tensor_t* tensor) : _dtype(QNN_TENSOR_GET_DATA_TYPE(tensor)) {}
+    DataType(Qnn_DataType_t dtype) : _dtype(dtype) {};
+
+    // Enable switch and comparisons
+    constexpr operator Qnn_DataType_t() const { return _dtype; }
+
+    int bw();
+    int type();
+
+    int32_t val();
+
+    const char* str();
+};
+
+bool writeRawData(void* tensorData, size_t tensorSize, const std::filesystem::path& path);
+bool readRawData(void* tensorData, size_t tensorSize, const std::filesystem::path& path);
+
+struct Dims {
+    int32_t batch = 1;
+    int32_t height, width, channel, bitWidth;
+    Dims() : height(0), width(0), channel(0), bitWidth(0) {}
+    Dims(int32_t height, int32_t width, int32_t channel, int32_t bitWidth)
+        : height(height), width(width), channel(channel), bitWidth(bitWidth) {}
+    Dims(std::vector<size_t>& tDims)
+        : height((int32_t)tDims[1]), width((int32_t)tDims[2]), channel((int32_t)tDims[3]),
+          bitWidth((int32_t)tDims[4]) {
+        // Hack to mix batch dimension
+        if (tDims[0] != 1 && tDims[1] == 1) height = tDims[0];
+        if (tDims[0] >  1 && tDims[1] != 1) batch  = tDims[0];
+    }
+    bool operator==(const Dims& rhs) const {
+        return (height == rhs.height) && (width == rhs.width) && (channel == rhs.channel) &&
+               (bitWidth == rhs.bitWidth);
+    }
+    bool   operator!=(const Dims& rhs) const { return !(operator==(rhs)); }
+    size_t getNumElements() const { return (size_t)(height * width * channel); }
+    size_t getSize() const { return (size_t)(batch * height * width * channel * bitWidth); }
+    size_t getAlignedSize() const {
+        size_t size = getSize();
+        if ((size & uint64_t{7}) != uint64_t{0}) {
+            size += (uint64_t{8} - (size & uint64_t{7}));
+        }
+        return size;
+    }
+    int32_t getMaxDim() const { return std::max({height, width, channel}); };
+    Dims    T() const { return Dims(width, height, channel, bitWidth); }
+};
+
+struct QuantParam {
+    double  scale;
+    int32_t offset;
+    QuantParam() {}
+    QuantParam(double scale_val, int32_t offset_val) : scale(scale_val), offset(offset_val) {}
+};
+
+struct Tensor {
+    Qnn_Tensor_t*           tensor = nullptr;
+    Dims                    dims;
+    std::vector<QuantParam> quantParam;
+    DataType                dtype;
+    Tensor() {}
+    Tensor(Qnn_Tensor_t* tensorVal, Dims dimsVal, std::vector<QuantParam> quantParamVec)
+        : tensor(tensorVal), dims(dimsVal), quantParam(quantParamVec),
+          dtype(QNN_TENSOR_GET_DATA_TYPE(tensorVal)) {}
+};
+
+// Maps tensor name to QnnUtils::Tensor<Qnn_Tensor_t* tensor, dims, quantparams>
+typedef std::map<std::string, Tensor> TensorMap;
+
+static inline uint8_t sat_round(const uint16_t x) {
+    const uint16_t rounded   = x + 0x80;             // add 0.5
+    const uint16_t corrected = std::max(rounded, x); // catch unsigned wrap around
+    const uint16_t shifted   = corrected >> 8;       // divide by 256
+    return static_cast<uint8_t>(shifted);            // to 8-bit
+}
+
+static inline void downcast_u16_to_u8(uint8_t* dest, const uint16_t* src, size_t nmemb) {
+    for (size_t i = 0; i < nmemb; i++)
+        dest[i] = sat_round(src[i]);
+}
+
+template <typename FloatType, typename IntType>
+static inline void quantizeTensorPtr(
+        FloatType* tensor_float,
+        IntType*   tensor_quant,
+        int32_t    offset,
+        double     scale,
+        size_t     nmemb
+) {
+#pragma clang loop vectorize(enable) interleave(enable)
+    for (size_t i = 0; i < nmemb; i++) {
+        double val      = tensor_float[i];
+        tensor_quant[i] = static_cast<IntType>(val / scale - offset);
+    }
+}
+
+template <typename FloatType, typename IntType>
+static inline void perWidthQuantizeTensorPtr(
+        FloatType*                         tensor_float,
+        IntType*                           tensor_quant,
+        std::vector<QnnUtils::QuantParam>& quantParam,
+        int32_t                            height,
+        int32_t                            width,
+        int32_t                            channel
+) {
+    for (size_t h = 0; h < height; h++) {
+        for (size_t w = 0; w < width; w++) {
+            double  scale  = quantParam[w].scale;
+            int32_t offset = quantParam[w].offset;
+#pragma clang loop vectorize(enable) interleave(enable)
+            for (size_t c = 0; c < channel; c++) {
+                int32_t i       = (h * width * channel) + (w * channel) + c;
+                double  val     = tensor_float[i];
+                tensor_quant[i] = static_cast<IntType>(val / scale - offset);
+            }
+        }
+    }
+}
+
+void getQuantParamString(
+        const std::vector<QuantParam>& quantParam,
+        std::string&                   scale_string,
+        std::string&                   offset_string
+);
+
+} // namespace QnnUtils
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/qnn-cpu.cpp b/Genie/Genie/src/qualla/engines/qnn-cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..48b1ab1aa4d820acd7581dbd6b6afa09aed08b83
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-cpu.cpp
@@ -0,0 +1,237 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <vector>
+#include <string>
+
+#include <qualla/engine.hpp>
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/timer.hpp>
+#include <qualla/detail/onload.hpp>
+
+#include <fmt/format.h>
+
+#include "cpu-model.hpp"
+
+#define __INFO(__fmt, ...)  _env.logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env.logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env.logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env.logger().post(Logger::ENGINE_KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::ENGINE_DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::ENGINE_TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace qualla {
+
+class QnnCpuEngine : public Engine {
+  private:
+    // Model parameters
+    std::unique_ptr<QnnCpuModel> _model;
+
+  public:
+    QnnCpuEngine(Context& ctx, const qualla::json& json);
+    ~QnnCpuEngine();
+
+    virtual size_t process(
+            const std::vector<int32_t>& tokens,
+            std::vector<float>&         logits,
+            bool                        logits_all
+    ) override;
+
+    virtual size_t process(
+            const std::vector<int32_t>& tokens,
+            const std::vector<int32_t>& attention_map,
+            std::vector<float>&         logits,
+            bool                        logits_all
+    ) override;
+
+    virtual bool   updateKV(size_t n_past) override;
+    virtual bool   updateKV(size_t n_past, const std::vector<bool>& selected) override;
+    virtual bool   save(const std::string& name) override;
+    virtual size_t restore(const std::string& name) override;
+    virtual void   reset() override;
+};
+
+namespace fs = std::filesystem;
+
+QnnCpuEngine::QnnCpuEngine(Context& ctx, const qualla::json& json) : Engine(ctx, "qnn-cpu", json) {
+    qualla::Timer start;
+
+    using FF  = Feature::Flags;
+    _features = FF::OUTPUT_LOGITS | FF::SAVE_RESTORE | FF::OUTPUT_EMBEDDINGS;
+
+    __DEBUG("qnn-cpu: init start");
+
+    qualla::Config conf(json, _type + "-engine:");
+
+    // Parse config
+    QnnCpuModel::Params p;
+
+    std::string model_output = conf.optional<std::string>("model-output", "logits");
+    if (model_output == "logits")
+        p.model_output = QnnCpuModel::ModelOutput::LOGITS;
+    else if (model_output == "embeddings")
+        p.model_output = QnnCpuModel::ModelOutput::EMBEDDINGS;
+    else
+        throw std::runtime_error(
+                "Only logits and embeddings outputs are supported. Invalid output supplied : " +
+                model_output
+        );
+
+    p.model_basedir  = _env.path().models / conf.optional<std::string>("model-basedir", "");
+    p.model_bin_path = conf.mandatory<std::string>("model-bin-path");
+    p.model          = conf.mandatory<std::string>("model");
+    p.op_package     = conf.mandatory<std::string>("op-package");
+    p.backend_lib    = conf.mandatory<std::string>("backend-lib");
+    p.n_threads      = conf.optional<uint32_t>("n-threads", 6);
+    p.n_logits       = conf.optional<uint32_t>("n_logits", 1);
+    p.n_layer        = conf.optional<uint32_t>("n_layer", 32);
+    p.n_embd         = conf.optional<uint32_t>("n_embd", 4096);
+    p.n_heads        = conf.optional<uint32_t>("n_heads", 32);
+    p.use_mmap       = conf.optional<bool>("use-mmap", false);
+    p.ctx_size       = _ctx.size();
+    p.n_vocab_size   = _ctx.n_vocab();
+
+    _model = std::make_unique<QnnCpuModel>(_env, p);
+
+    // Load model
+    if (true != _model->initializeModel()) {
+        throw std::runtime_error("Failure to initialize model");
+    }
+
+    // Initialize IO Tensor buffers
+    if (true != _model->initializeIOTensors()) {
+        throw std::runtime_error("Error in setting up IO Tensors");
+    }
+
+    if (true != _model->validateModel()) {
+        throw std::runtime_error("Error validating model. Please check your I/O");
+    }
+
+    __DEBUG("qnn-cpu: model has been validated!");
+
+    if (true != _model->initializeTensorPointers()) {
+        throw std::runtime_error("Error : Could not find I/O tensors in loaded graphs");
+    }
+
+    _kpis.load.update(start.elapsed_usec());
+};
+
+QnnCpuEngine::~QnnCpuEngine() {
+    __DEBUG("qnn-cpu: fini");
+}
+
+bool QnnCpuEngine::updateKV(size_t n_past) {
+    qualla::Timer start;
+
+    if (n_past > _ctx.size()) {
+        __ERROR("qnn-cpu: context size exceeded : n_past {}", n_past);
+        State::error("context size exceeded");
+        return false;
+    }
+
+    __DEBUG("qnn-cpu: update-kv start : n_past {}", n_past);
+
+    _model->setKVCacheNPast(n_past);
+
+    __DEBUG("qnn-cpu: update-kv complete : {} usec", start.elapsed_usec());
+
+    _kpis.update_kv.update(start.elapsed_usec());
+
+    return true;
+}
+
+bool QnnCpuEngine::updateKV(size_t n_past, const std::vector<bool>& selected) {
+    qualla::Timer start;
+
+    if (n_past > _ctx.size()) {
+        __ERROR("qnn-cpu: context size exceeded : n_past {}", n_past);
+        State::error("context size exceeded");
+        return false;
+    }
+
+    __DEBUG("qnn-cpu: update-kv start : n_past {}", n_past);
+
+    _model->setKVCacheNPast(n_past);
+
+    __DEBUG("qnn-cpu: update-kv complete : {} usec", start.elapsed_usec());
+
+    _kpis.update_kv.update(start.elapsed_usec());
+
+    return true;
+}
+
+size_t QnnCpuEngine::process(
+        const std::vector<int32_t>& tokens,
+        std::vector<float>&         logits,
+        bool                        logits_all = false
+) {
+    qualla::Timer start;
+
+    __DEBUG("qnn-cpu: inference start: n_tokens {}", tokens.size());
+
+    _model->runInference(tokens, logits_all);
+
+    __DEBUG("qnn-cpu: inference complete : {} usec", start.elapsed_usec());
+
+    size_t n_tok;
+
+    {
+        qualla::Timer t;
+
+        __DEBUG("qnn-cpu: get-logits start: all {}", logits_all);
+
+        n_tok = _model->getDequantLogits(logits, logits_all);
+
+        __DEBUG("qnn-cpu: get-logits complete : {} usec", t.elapsed_usec());
+    }
+
+    _kpis.process.update(start.elapsed_usec());
+
+    return n_tok;
+}
+
+size_t QnnCpuEngine::process(
+    const std::vector<int32_t>& tokens,
+    const std::vector<int32_t>& attention_map,
+    std::vector<float>&         logits,
+    bool                        logits_all = false
+) {
+    return process(
+        tokens,
+        logits,
+        logits_all
+    );
+}
+
+size_t QnnCpuEngine::restore(const std::string& name) {
+    fs::path cache_path = std::filesystem::path(name) / fmt::format("kv-cache.{}.qnn-cpu", _role);
+    return _model->loadKVCache(cache_path.string());
+}
+
+bool QnnCpuEngine::save(const std::string& name) {
+    fs::path cache_path = std::filesystem::path(name) / fmt::format("kv-cache.{}.qnn-cpu", _role);
+    return _model->saveKVCache(cache_path.string());
+}
+
+void QnnCpuEngine::reset() {
+    // It's enough to just drop the KV$
+    updateKV(0);
+}
+
+// Registrator instance
+static OnLoad regy([]() {
+    Engine::__register("qnn-cpu", [](Context& ctx, const json& conf) {
+        return (Engine*)new QnnCpuEngine(ctx, conf);
+    });
+});
+void          needQnnCpuEngine() {}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/qnn-cpu/cpu-model.cpp b/Genie/Genie/src/qualla/engines/qnn-cpu/cpu-model.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4e71728021596f2ad9d3d4679b5719f38079000c
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-cpu/cpu-model.cpp
@@ -0,0 +1,689 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "qualla/env.hpp"
+#include "qualla/detail/timer.hpp"
+#include "qualla/detail/cache-file.hpp"
+
+#include "fmt/format.h"
+#include "fmt/ranges.h"
+
+#include "qnn-utils.hpp"
+#include "cpu-model.hpp"
+
+#include <set>
+#include <cstring>
+#include <fstream>
+#include <sstream>
+#include <cassert>
+
+namespace fs = std::filesystem;
+
+#define __INFO(__fmt, ...)  _env.logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env.logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env.logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env.logger().post(Logger::ENGINE_KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::ENGINE_DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::ENGINE_TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace qualla {
+
+QnnCpuModel::QnnCpuModel(Env& env, const Params& params)
+    : _env(env), model_basedir(params.model_basedir), op_package(params.op_package),
+      backend_lib(params.backend_lib), model_bin_path(params.model_bin_path), model(params.model),
+      m_ctx_size(params.ctx_size), m_num_threads(params.n_threads), m_num_tokens(params.ctx_size),
+      m_numLogits(params.n_logits), m_vocab_size(params.n_vocab_size), m_num_layer(params.n_layer),
+      m_embd(params.n_embd), m_num_heads(params.n_heads), m_use_mmap(params.use_mmap),
+      model_output(params.model_output) {
+    // Initialize QnnAPI
+    m_qnnApi = std::unique_ptr<QnnApi>(new QnnApi());
+    m_head_dim = m_embd / m_num_heads;
+    m_input_dim.push_back(1);
+    m_input_dim.push_back(m_ctx_size);
+    // K$, V$ 4D Tensor {n_layer, n_heads, n_ctx, n_head_dim}
+    m_kv_dim.push_back(m_num_layer);
+    m_kv_dim.push_back(m_num_heads);
+    m_kv_dim.push_back(m_ctx_size + 1);
+    m_kv_dim.push_back(m_head_dim);
+    if (model_output == ModelOutput::LOGITS) {
+        m_output_dim.push_back(m_numLogits);
+        m_output_dim.push_back(m_vocab_size);
+    } else if (model_output == ModelOutput::EMBEDDINGS) {
+        m_numLogits = m_ctx_size;
+        m_output_dim.push_back(m_numLogits);
+        m_output_dim.push_back(m_embd);
+    }
+}
+
+QnnCpuModel::~QnnCpuModel() {
+    // Free Qnn Tensor and their memory
+    auto start = std::chrono::steady_clock::now();
+    if (dequant_logits_ptr != nullptr) free(dequant_logits_ptr);
+    if (m_ioTensor) {
+        QNN_DEBUG("Tearing Down Input Tensors Bank");
+        for (auto& graph_name : model_order) {
+            m_ioTensor->tearDownTensors(
+                    m_input_tensors[graph_name], m_input_specs[graph_name].size()
+            );
+            m_ioTensor->tearDownTensors(
+                    m_output_tensors[graph_name], m_output_specs[graph_name].size()
+            );
+        }
+    }
+    auto stop = std::chrono::steady_clock::now();
+    //QnnUtils::logProfile("Model destruction (cpp) took", start, stop);
+}
+
+// Given a filename, initializeModel load and initializes QNN runtime libraries and the model
+bool QnnCpuModel::initializeModel(void) {
+    // prepare params
+    Qnn_Param_t params[5];
+    params[0].paramType               = QNN_PARAMTYPE_SCALAR;
+    params[0].name                    = (char*)("model_bin_path");
+    params[0].scalarParam.dataType    = QNN_DATATYPE_STRING;
+    params[0].scalarParam.stringValue = model_bin_path.c_str();
+
+    params[1].paramType               = QNN_PARAMTYPE_SCALAR;
+    params[1].name                    = (char*)("num_thread");
+    params[1].scalarParam.dataType    = QNN_DATATYPE_UINT_32;
+    params[1].scalarParam.uint32Value = m_num_threads;
+
+    params[2].paramType               = QNN_PARAMTYPE_SCALAR;
+    params[2].name                    = (char*)("num_context");
+    params[2].scalarParam.dataType    = QNN_DATATYPE_UINT_32;
+    params[2].scalarParam.uint32Value = m_ctx_size;
+
+    params[3].paramType               = QNN_PARAMTYPE_SCALAR;
+    params[3].name                    = (char*)("num_last_logits");
+    params[3].scalarParam.dataType    = QNN_DATATYPE_UINT_32;
+    params[3].scalarParam.uint32Value = m_numLogits;
+
+    params[4].paramType               = QNN_PARAMTYPE_SCALAR;
+    params[4].name                    = (char*)("use_mmap");
+    params[4].scalarParam.dataType    = QNN_DATATYPE_BOOL_8;
+    params[4].scalarParam.uint32Value = m_use_mmap;
+
+    if (true != m_qnnApi->initialize(
+                        backend_lib,
+                        model,
+                        op_package,
+                        ContextConfigs(),
+                        {},
+                        m_input_dim.data(),
+                        m_input_dim.size(),
+                        m_output_dim.data(),
+                        m_output_dim.size(),
+                        m_kv_dim.data(),
+                        m_kv_dim.size(),
+                        params,
+                        5,
+                        false
+                )) {
+        QNN_ERROR("Backend library : %s", backend_lib.c_str());
+        throw std::runtime_error("QNN initialization failed!");
+    }
+
+    // Initialize QNN IO Tensor
+    m_ioTensor   = std::unique_ptr<IOTensor>(new IOTensor());
+    m_num_graphs = m_qnnApi->getGraphsCount();
+    QNN_DEBUG("QNN initialized with %u graph(s)", m_num_graphs);
+
+    auto graphs_info = m_qnnApi->getGraphsInfo();
+    for (size_t graph_idx = 0; graph_idx < m_num_graphs; graph_idx++) {
+        GraphInfo_t* const& graph_info = graphs_info[graph_idx];
+        char*               graph_name = graph_info->graphName;
+        std::string         graph_str  = std::string(graph_name);
+
+        QNN_DEBUG("Loaded graph[%lu] = %s", graph_idx, graph_name);
+        model_order.push_back(graph_str);
+        model_context[graph_str] =
+                m_qnnApi->getContexts()[graph_idx / m_qnnApi->getGraphCountPerContext()];
+    }
+
+    // CPU support KV cache mode
+    m_mode = ExecutionMode::KV_ONLY;
+
+    return true;
+}
+
+// Once the model has been loaded, initialize IO Tensors
+// m_ioTensors is initialized by the context for now
+bool QnnCpuModel::initializeIOTensors() {
+    QNN_DEBUG("Create input tensors bank");
+
+    // Ideally, we should create and initalize m_ioTensor for each context, but we want to
+    // be able to see/use all the buffers in every contexts so that they can be connected
+    // with each other. Hence, we are using only the first context to initialize the m_ioTensor
+    // and use it for all graphs/contexts.
+    if (true != m_ioTensor->initialize(m_qnnApi->getContexts()[0])) {
+        QNN_ERROR("Failure to initialize IOTensor");
+        return false;
+    }
+
+    // Getting graph info and its count needed for subsequent steps
+    GraphInfo_t** const& graphsInfo = m_qnnApi->getGraphsInfo();
+
+    for (size_t graphIdx = 0; graphIdx < m_num_graphs; graphIdx++) {
+        GraphInfo_t* const& graphInfo = graphsInfo[graphIdx];
+        std::string         graphName = std::string(graphInfo->graphName);
+
+        // Setup Inputs
+        {
+            std::unordered_map<std::string, size_t> inputTensorsSize;
+            for (size_t tensorIdx = 0; tensorIdx < graphInfo->numInputTensors; tensorIdx++) {
+                std::string         tensor_name;
+                std::vector<size_t> tensorDims;
+
+                auto& tensor = graphInfo->inputTensors[tensorIdx];
+                m_qnnApi->getTensorNameAndShape(tensor_name, tensorDims, tensor);
+                std::vector<QnnUtils::QuantParam> quantParams;
+                if (!m_qnnApi->getTensorQuantParams(&tensor, quantParams)) {
+                    QNN_DEBUG("Couldn't get tensor quant params : %s", tensor_name.c_str());
+                    quantParams.emplace_back(0, 0);
+                }
+
+                auto dims                     = QnnUtils::Dims(tensorDims);
+                inputTensorsSize[tensor_name] = dims.getAlignedSize();
+
+                m_input_specs[graphName][tensor_name] = {&tensor, dims, quantParams};
+            }
+
+            Qnn_Tensor_t*                          tensor_bank = nullptr;
+            std::unordered_map<std::string, void*> tensor_ptr_map;
+            if (true != m_ioTensor->setupInputTensors(
+                                &tensor_bank,
+                                tensor_ptr_map,
+                                *graphInfo,
+                                inputTensorsSize,
+                                m_qnnApi->getContexts()[graphIdx],
+                                false
+                        )) {
+                QNN_ERROR("Error in setting up Input Tensors for graph %s", graphName.c_str());
+                return false;
+            }
+
+            m_input_tensors[graphName] = tensor_bank;
+            for (auto& [tensor_name, tensor_ptr] : tensor_ptr_map) {
+                m_input_specs[graphName][tensor_name].tensor = (Qnn_Tensor_t*)tensor_ptr;
+            }
+        }
+
+        // Setup Outputs
+        {
+            std::unordered_map<std::string, size_t> outputTensorsSize;
+            for (size_t tensorIdx = 0; tensorIdx < graphInfo->numOutputTensors; tensorIdx++) {
+                std::string         tensor_name;
+                std::vector<size_t> tensorDims;
+
+                auto& tensor = graphInfo->outputTensors[tensorIdx];
+                m_qnnApi->getTensorNameAndShape(tensor_name, tensorDims, tensor);
+                std::vector<QnnUtils::QuantParam> quantParams;
+                if (!m_qnnApi->getTensorQuantParams(&tensor, quantParams)) {
+                    QNN_DEBUG("Couldn't get tensor quant params : %s", tensor_name.c_str());
+                    quantParams.emplace_back(0, 0);
+                }
+
+                auto dims                      = QnnUtils::Dims(tensorDims);
+                outputTensorsSize[tensor_name] = dims.getAlignedSize();
+
+                m_output_specs[graphName][tensor_name] = {&tensor, dims, quantParams};
+            }
+
+            Qnn_Tensor_t*                          tensor_bank = nullptr;
+            std::unordered_map<std::string, void*> tensor_ptr_map;
+            if (true != m_ioTensor->setupOutputTensors(
+                                &tensor_bank,
+                                tensor_ptr_map,
+                                *graphInfo,
+                                outputTensorsSize,
+                                m_qnnApi->getContexts()[graphIdx],
+                                false
+                        )) {
+                QNN_ERROR("Error in setting up Output Tensors for graph %s", graphName.c_str());
+                return false;
+            }
+
+            m_output_tensors[graphName] = tensor_bank;
+            for (auto& [tensor_name, tensor_ptr] : tensor_ptr_map) {
+                m_output_specs[graphName][tensor_name].tensor = (Qnn_Tensor_t*)tensor_ptr;
+            }
+        }
+    }
+
+#ifdef DUMP_TENSOR_SPECS
+    dumpTensorSpecs();
+#endif
+
+    auto stop = std::chrono::steady_clock::now();
+    //QnnUtils::logProfile("initializeIoTensors (cpp) took", start, stop);
+
+    return true;
+}
+
+void QnnCpuModel::dumpTensorSpecs() {
+#ifdef DEBUG_DUMP_TARGET_PATH
+    if (true != QnnUtils::CreateDirsIfNotExist(DEBUG_DUMP_TARGET_PATH)) {
+        throw std::runtime_error(
+                std::string("Could not create directory : ") + DEBUG_DUMP_TARGET_PATH
+        );
+    }
+
+    static const char* stringFmt =
+            "\t\t{ \"name\": \"%s\", \"dims\": [1, %d, %d, %d], \"bitwidth\": %d, \"scale\": [%s], \"offset\": [%s] },\n";
+
+    GraphInfo_t** const& graphsInfo = m_qnnApi->getGraphsInfo();
+    for (size_t graphIdx = 0; graphIdx < m_num_graphs; graphIdx++) {
+        GraphInfo_t* const& graphInfo = graphsInfo[graphIdx];
+        std::string         graphName = std::string(graphInfo->graphName);
+
+        // Create output spec file and open it
+        char filename[255];
+        sprintf(filename, "%s/spec.%s.json", DEBUG_DUMP_TARGET_PATH, graphInfo->graphName);
+
+        FILE* specFile = fopen(filename, "w");
+        if (specFile == NULL) {
+            throw std::runtime_error(std::string("Error opening file : ") + filename);
+        }
+
+        fprintf(specFile, "{\n\t\"graph_name\" : \"%s\",\n\t\"inputs\" : [\n", graphName.c_str());
+
+        std::string         tensor_name;
+        std::vector<size_t> tensorDims;
+
+        for (size_t tensorIdx = 0; tensorIdx < graphInfo->numInputTensors; tensorIdx++) {
+            auto& tensor = graphInfo->inputTensors[tensorIdx];
+            m_qnnApi->getTensorNameAndShape(tensor_name, tensorDims, tensor);
+            std::string fixed_tensor_name = tensor_name.substr(0, tensor_name.find("_converted"));
+            QnnUtils::Tensor& spec        = m_input_specs[graphName][fixed_tensor_name];
+            std::string       scales;
+            std::string       offsets;
+            getQuantParamString(spec.quantParam, scales, offsets);
+            fprintf(specFile,
+                    stringFmt,
+                    tensor_name.c_str(),
+                    spec.dims.height,
+                    spec.dims.width,
+                    spec.dims.channel,
+                    spec.dims.bitWidth,
+                    scales.c_str(),
+                    offsets.c_str());
+        }
+
+        fseek(specFile, -2, SEEK_CUR); // Remove trailing comma
+
+        // Dump out output tensor specs
+        fprintf(specFile, "\n\t],\n\t\"outputs\" : [\n");
+
+        for (size_t tensorIdx = 0; tensorIdx < graphInfo->numOutputTensors; tensorIdx++) {
+            auto& tensor = graphInfo->outputTensors[tensorIdx];
+            m_qnnApi->getTensorNameAndShape(tensor_name, tensorDims, tensor);
+            std::string fixed_tensor_name = tensor_name.substr(0, tensor_name.find("_converted"));
+            QnnUtils::Tensor& spec        = m_output_specs[graphName][fixed_tensor_name];
+            std::string       scales;
+            std::string       offsets;
+            getQuantParamString(spec.quantParam, scales, offsets);
+            fprintf(specFile,
+                    stringFmt,
+                    tensor_name.c_str(),
+                    spec.dims.height,
+                    spec.dims.width,
+                    spec.dims.channel,
+                    spec.dims.bitWidth,
+                    scales.c_str(),
+                    offsets.c_str());
+        }
+        fseek(specFile, -2, SEEK_CUR); // Remove trailing comma
+        fprintf(specFile, "\n\t]\n}");
+
+        fclose(specFile);
+    }
+#else
+    QNN_ERROR(
+            "Requested dump tensor specs, but DEBUG_DUMP_TARGET_PATH not set. Please check nsp-model.h"
+    );
+#endif
+}
+
+template <bool PrintError = true, typename ValType>
+inline bool findTensor(std::unordered_map<std::string, ValType>& map, std::string key) {
+    if (map.find(key) == map.end()) {
+        if constexpr (PrintError == true) QNN_ERROR("Cannot find %s\n", key.c_str());
+        return false;
+    }
+    return true;
+}
+
+template <bool PrintError = false, typename ValType>
+inline ValType* getTensor(std::unordered_map<std::string, ValType>& map, std::string key) {
+    if (map.find(key) == map.end()) {
+        if constexpr (PrintError == true) QNN_ERROR("Cannot find %s\n", key.c_str());
+        return nullptr;
+    }
+    return &map[key];
+}
+
+// Run all validations for the model here so we can exit early
+bool QnnCpuModel::validateModel() {
+    return true;
+}
+
+bool QnnCpuModel::initializeTensorPointers() {
+    auto& input_specs         = m_input_specs[model_order.back()];
+    t_input_ids               = &input_specs["x0"];
+    t_input_ids_num_token     = &input_specs["x1"];
+    t_input_ids_reset_kvcache = &input_specs["x2"];
+    t_input_ids_k_cache       = &input_specs["x3"];
+    t_input_ids_v_cache       = &input_specs["x4"];
+    t_input_ids_n_past        = &input_specs["x5"];
+
+    auto& output_specs = m_output_specs[model_order.back()];
+    t_logits           = &output_specs["output_genAI"];
+    t_output_n_past    = &output_specs["output_npast"];
+    return true;
+}
+
+void QnnCpuModel::setupInputTensors(const std::vector<int32_t>& tokens, bool run_bert_mode) {
+    auto start = std::chrono::steady_clock::now();
+
+    size_t num_tokens = m_num_tokens;
+
+    if (tokens.size() > num_tokens) {
+        std::string err_msg = "Called inference with more tokens than model supports: ";
+        err_msg += std::to_string(tokens.size()) + " vs. " + std::to_string(num_tokens);
+        throw std::runtime_error(err_msg);
+    }
+
+    // Grab pointers to buffers for access
+    uint32_t* input_id_buffer               = (uint32_t*)getBuffer(t_input_ids);
+    uint32_t* input_id_num_token_buffer     = (uint32_t*)getBuffer(t_input_ids_num_token);
+    uint32_t* input_id_reset_kvcache_buffer = (uint32_t*)getBuffer(t_input_ids_reset_kvcache);
+    uint32_t* input_id_n_past_buffer        = (uint32_t*)getBuffer(t_input_ids_n_past);
+
+    uint32_t size = 1;
+    for (auto dim : m_input_dim) {
+        size *= dim;
+    }
+
+    std::memset(input_id_buffer, 0, size * sizeof(uint32_t));
+    std::memset(input_id_n_past_buffer, 0, sizeof(uint32_t));
+    std::memset(input_id_num_token_buffer, 0, sizeof(uint32_t));
+    std::memset(input_id_reset_kvcache_buffer, 0, sizeof(uint32_t));
+
+    std::memcpy(input_id_buffer, tokens.data(), tokens.size() * sizeof(uint32_t));
+    *input_id_num_token_buffer = tokens.size();
+    *input_id_n_past_buffer = m_nPast;
+
+    auto stop = std::chrono::steady_clock::now();
+    // QnnUtils::logProfile("setupInputTensors (cpp) took", start, stop);
+}
+
+// Use qnnAPI to execute the model
+template <class T1, class T2>
+inline bool QnnCpuModel::executeModel(T1& input, T2& output, std::string graph_name) {
+    // given that a dnn instance is created and we have input loaded with image data we can get our output
+    // for our required app functionality Execute the network with the given single input.
+    QNN_DEBUG("Now executing inference for graph %s", graph_name.c_str());
+
+#ifdef INPUT_DUMP
+    if (m_inference_count < 5) dumpTensors(graph_name, true); // Dump input tensors
+#endif
+
+    bool ret = m_qnnApi->graphExecute(input, output, graph_name, timeLogs);
+
+    if (ret != true) {
+        QNN_ERROR("ERROR executing inference: %d for graph %s", ret, graph_name.c_str());
+        return false;
+    }
+#ifdef OUTPUT_DUMP
+    if (m_inference_count < 5) dumpTensors(graph_name, false); // Dump output tensors
+#endif
+    QNN_DEBUG("Execute finished for graph %s", graph_name.c_str());
+
+    return true;
+}
+
+bool QnnCpuModel::runInferenceHelper(
+        std::vector<std::string>& exec_models,
+        int32_t*                  wait_time_total,
+        int32_t*                  exec_time_total,
+        bool                      pipeline_kv_update,
+        size_t                    update_size
+) {
+    int32_t exec_time = 0;
+    int32_t wait_time = 0;
+    for (auto& graph_name : exec_models) {
+        {
+            auto startTime = std::chrono::steady_clock::now();
+            if (true !=
+                executeModel(m_input_tensors[graph_name], m_output_tensors[graph_name], graph_name))
+                return false;
+            auto endTime = std::chrono::steady_clock::now();
+            exec_time += static_cast<int32_t>(
+                    std::chrono::duration_cast<std::chrono::microseconds>(endTime - startTime)
+                            .count()
+            );
+        }
+    }
+
+    if (pipeline_kv_update) {
+        m_nPast += update_size;
+    }
+
+    *exec_time_total = exec_time;
+    *wait_time_total = wait_time;
+    return true;
+}
+
+bool QnnCpuModel::runInference(const std::vector<int32_t>& tokens, bool logits_all) {
+    __DEBUG("qnn-cpu: run-inference start : n_tokens {}", tokens.size());
+
+    auto start = std::chrono::steady_clock::now();
+
+    // Technical note: int32_t can hold upto 596 hours
+    // Even int16_t should be sufficient here - it holds upto 32.8 seconds
+    int32_t total_wait_time = 0;
+    int32_t total_exec_time = 0;
+
+    // Setup inputs for inference
+    setupInputTensors(tokens, false);
+
+    auto& exec_models = model_order;
+    if (!runInferenceHelper(exec_models, &total_wait_time, &total_exec_time, false, tokens.size()))
+        return false;
+
+    prev_run.num_tokens_processed = tokens.size();
+    m_inference_count++;
+
+    prev_run.was_bert_mode  = false;
+    prev_run.was_logits_all = logits_all;
+
+    auto stop = std::chrono::steady_clock::now();
+    //QnnUtils::logProfile("Run Inference (cpp) took", start, stop);
+    timeLogs["Run Inference (cpp) "].first += static_cast<double>(
+            std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count()
+    );
+    timeLogs["Run Inference (cpp) "].second++;
+    QNN_DEBUG("[TIME] Wait[%d] Exec[%d]\n", total_wait_time, total_exec_time);
+    return true;
+}
+
+void QnnCpuModel::printFinalLogs() {
+#if NSP_LOG_LEVEL > 1
+    QNN_DEBUG("Total inference count : %d", m_inference_count);
+    for (auto& [key, value] : timeLogs) {
+        QNN_DEBUG("%s : %lf", key.c_str(), value.first / value.second);
+    }
+#endif
+}
+
+bool QnnCpuModel::setKVCacheNPast(size_t n_past) {
+    if(n_past > m_nPast) {
+        size_t num_update = n_past - m_nPast;
+        if (n_past != 0 && num_update > prev_run.num_tokens_processed) {
+            std::string err_msg = "Requested larger n_past update than #tokens produced by model";
+            err_msg += std::to_string(num_update) + " vs. " + std::to_string(m_num_tokens);
+            throw std::runtime_error(err_msg);
+        }
+    }
+
+    m_nPast = n_past;
+    return true;
+}
+
+size_t QnnCpuModel::getDequantLogits(std::vector<float>& dequant_logits, bool logits_all) {
+    // if model is BERT, always return ALL logits
+    if (model_output == ModelOutput::EMBEDDINGS)
+        logits_all = true;
+
+    __DEBUG("qnn-cpu: get-dequant-logits logits_all {}", logits_all);
+
+    auto&  logit_spec = m_output_specs[model_order.back()]["output_genAI"];
+    float* logitBuf   = (float*)getBuffer(logit_spec);
+    size_t offset = 0;
+    dequant_logits.clear();
+    if (model_output == ModelOutput::LOGITS) {
+        // if logits_all return [m_numLogits * m_vocab_size] else return [1 * m_vocab_size]
+        if (!logits_all) {
+            // Return the last processed token logits i.e. [ ..., [1]]
+            if (m_numLogits > 1) {
+                offset = (m_numLogits - 1) * m_vocab_size;
+            }
+        } else {
+            // if m_numLogits > n_tokens_processed, it is left padded, [0, 0, [n_tokens_processed]]
+            // calculate offset for getting the appropriate logits
+            if (m_numLogits >= prev_run.num_tokens_processed) {
+                offset = (m_numLogits - prev_run.num_tokens_processed) * m_vocab_size;
+            }
+        }
+    }
+#ifdef DUMP_LOGITS
+    {
+        char fname[255];
+        sprintf(fname, "%s/logits/%03d", DEBUG_DUMP_TARGET_PATH, m_inference_count);
+        QnnUtils::writeRawData(getBuffer(logit_spec), getBufferSize(logit_spec), fname);
+    }
+#endif
+    if (model_output == ModelOutput::LOGITS) {
+        // logits size = [m_numLogits * m_vocab_size]
+        // logits might be left padded so, use calculated offset
+        dequant_logits.reserve((getBufferSize(logit_spec) - (offset * sizeof(float))));
+        for (auto i = offset; i < (getBufferSize(logit_spec) / sizeof(float)); ++i) {
+            dequant_logits.push_back(logitBuf[i]);
+        }
+    } else if (model_output == ModelOutput::EMBEDDINGS) {
+        // embeddings size = [n_tokens_processed * m_embd]
+        dequant_logits.reserve((prev_run.num_tokens_processed * m_embd * sizeof(float)));
+        for (auto i = offset; i < ((prev_run.num_tokens_processed * m_embd)); ++i) {
+            dequant_logits.push_back(logitBuf[i]);
+        }
+    }
+
+    return logits_all? prev_run.num_tokens_processed : 1;
+}
+
+// TODO: implement save/restore
+size_t QnnCpuModel::loadKVCache(const std::string& load_path) {
+    //TO read the cache file into KV tensor
+    std::ifstream f(load_path, std::ios::in | std::ios::binary);
+    if (f.fail()) {
+        // TODO: replace with proper error handling
+        __ERROR("qnn-cpu: load-kv errror reading file {}", load_path);
+        return 0;
+    }
+
+    CacheFileSpec spec;
+    f.read((char*)&spec, sizeof(spec));
+    if (spec.magic != 0xC0DE) {
+        __ERROR("qnn-cpu: load-kv expected 0xC0DE found {:#x}", spec.magic);
+        return 0;
+    }
+    // clang-format off
+    __DEBUG("qnn-cpu: load-kv {{ num_tensors {}, magic {}, dtype {}, n_heads {}, embed_dim {} update_size {} }}",
+    spec.num_tensors, spec.magic, int(spec.dtype), spec.n_heads, spec.embed_dim, spec.update_size);
+    // clang-format on
+
+    const int32_t n_valid = static_cast<int32_t>(spec.update_size);
+
+    float* input_id_k_cache_buffer = (float*)getBuffer(t_input_ids_k_cache);
+    float* input_id_v_cache_buffer = (float*)getBuffer(t_input_ids_v_cache);
+
+    // K$, V$ 4D Tensor {n_layer, n_heads, n_ctx, n_head_dim}
+
+    const size_t copy_size = n_valid * m_head_dim;
+    const size_t skip_size = (m_ctx_size + 1) * m_head_dim;
+
+    for (int i = 0; i < m_num_layer; i++) {
+        for(int j = 0; j < m_num_heads; j++) {
+            f.read((char*)input_id_k_cache_buffer, copy_size * sizeof(float));
+            input_id_k_cache_buffer += skip_size;
+        }
+     }
+
+    for (int i = 0; i < m_num_layer; i++) {
+        for(int j = 0; j < m_num_heads; j++) {
+            f.read((char*)input_id_v_cache_buffer, copy_size * sizeof(float));
+            input_id_v_cache_buffer += skip_size;
+         }
+     }
+
+     f.close();
+
+     m_nPast = n_valid;
+     prev_run.num_tokens_processed = m_nPast;
+     return spec.update_size;
+}
+
+bool QnnCpuModel::saveKVCache(const std::string& save_path) {
+    __DEBUG("qnn-cpu: save-kv path {}", save_path);
+
+    std::ofstream f(save_path, std::ios::out | std::ios::binary);
+    if (f.fail()) {
+        __ERROR("qnn-cpu: save-kv error opening file : {}", save_path);
+        throw std::runtime_error("Failed to write to cache file. Please re-check path");
+    }
+
+    const uint32_t  n_valid = static_cast<uint32_t>(m_nPast);
+    const CacheFileSpec::DataType dtype   = CacheFileSpec::DataType::FLOAT32_T;
+
+   // Save the cache file metadata
+    CacheFileSpec spec(m_num_layer * 2, 0xc0de, dtype, 0x0, m_num_heads, m_head_dim, n_valid);
+    f.write((char*)&spec, sizeof(spec)); // as nsp already updated the spec
+    if(n_valid > 0) {
+        // Dump KeyCache and ValueCache
+        float* input_id_k_cache_buffer = (float*)getBuffer(t_input_ids_k_cache);
+       float* input_id_v_cache_buffer = (float*)getBuffer(t_input_ids_v_cache);
+
+        // K$, V$ 4D Tensor {n_layer, n_heads, n_ctx, n_head_dim}
+
+        const size_t copy_size = n_valid * m_head_dim;
+       const size_t skip_size = (m_ctx_size + 1) * m_head_dim;
+        for (int i = 0; i < m_num_layer; i++) {
+            for(int j = 0; j < m_num_heads; j++) {
+                f.write((char*)input_id_k_cache_buffer, copy_size * sizeof(float));
+                input_id_k_cache_buffer += skip_size;
+            }
+        }
+
+        for (int i = 0; i < m_num_layer; i++) {
+            for(int j = 0; j < m_num_heads; j++) {
+                f.write((char*)input_id_v_cache_buffer, copy_size * sizeof(float));
+                input_id_v_cache_buffer += skip_size;
+            }
+        }
+    }
+
+    f.flush();
+    f.close();
+
+    return true;
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/qnn-cpu/cpu-model.hpp b/Genie/Genie/src/qualla/engines/qnn-cpu/cpu-model.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..5d6b606acb4e357752df896fe65d8ae4c0afbe26
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-cpu/cpu-model.hpp
@@ -0,0 +1,194 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef __QUALLA_QNN_CPU_MODEL_H_
+#define __QUALLA_QNN_CPU_MODEL_H_
+
+#include <vector>
+#include <string>
+#include <filesystem>
+#include <vector>
+#include <string>
+#include <filesystem>
+
+#include "qualla/env.hpp"
+
+#include "QnnApi.hpp"
+#include "IOTensor.hpp"
+#include "qnn-utils.hpp"
+
+#define LLAMA_MODEL
+
+namespace qualla {
+
+class QnnCpuModel {
+    enum ExecutionMode { AUTODETECT, BERT_KV, KV_ONLY, BERT_ONLY };
+
+    Env& _env;
+
+  public:
+    enum ModelOutput { LOGITS = 0x0, EMBEDDINGS= 0x1 };
+
+    struct Params {
+        std::filesystem::path model_basedir;
+        std::string           op_package;
+        std::string           backend_lib;
+        std::string           model_bin_path;
+        std::string           model;
+        ModelOutput           model_output;
+
+        bool     use_mmap;
+        uint32_t ctx_size;
+        uint32_t n_threads;
+        size_t   n_vocab_size;
+        uint32_t n_logits;
+        uint32_t n_layer;
+        uint32_t n_embd;
+        uint32_t n_heads;
+    };
+
+    const std::filesystem::path model_basedir;
+    std::vector<std::string>    filename_list;
+    std::vector<std::string>    model_order;
+    std::vector<std::string>    bert_model_order;
+    std::vector<std::string>    kv_model_order;
+
+    std::string op_package;
+    std::string backend_lib;
+    std::string model_bin_path;
+    std::string model;
+
+    long long int spill_fill_buffer_size;
+
+    std::unordered_map<std::string, Qnn_ContextHandle_t> model_context;
+    ModelOutput                                          model_output;
+    std::map<std::string, std::pair<double, uint16_t>>   timeLogs;
+    std::unique_ptr<QnnApi>                              m_qnnApi;
+    std::unique_ptr<IOTensor>                            m_ioTensor{nullptr};
+
+    // Model parameters
+
+    size_t                   m_ctx_size{1024};
+    size_t                   m_num_layer{0};
+    size_t                   m_embd{0};
+    size_t                   m_num_heads{0};
+    size_t                   m_head_dim{0};
+    size_t                   m_num_tokens{0};
+    std::string              position_id_path_cos;
+    std::string              position_id_path_sin;
+    int32_t                  eos_token_id;
+    int32_t                  m_num_threads;
+    int32_t                  m_numLogits;
+    size_t                   m_vocab_size{32000}; //todo:update vocab size from tokenzier
+    bool                     m_use_mmap{false};
+    std::vector<uint32_t>    m_kv_dim;
+    std::vector<uint32_t>    m_input_dim;
+    std::vector<uint32_t>    m_output_dim;
+    std::vector<Qnn_Param_t> m_params;
+    ExecutionMode            m_mode{ExecutionMode::AUTODETECT};
+
+    // Save some information about the last inference run
+    struct PreviousRunInfo {
+        bool   was_bert_mode;
+        size_t num_tokens_processed;
+        bool   was_logits_all;
+    } prev_run{false, 0};
+
+    // Model specific variables
+    uint32_t                                       m_num_graphs;
+    std::unordered_map<std::string, Qnn_Tensor_t*> m_input_tensors;
+    std::unordered_map<std::string, std::unordered_map<std::string, QnnUtils::Tensor>>
+            m_input_specs;
+
+    std::unordered_map<std::string, Qnn_Tensor_t*> m_output_tensors;
+    std::unordered_map<std::string, std::unordered_map<std::string, QnnUtils::Tensor>>
+            m_output_specs;
+
+    // Store some pointers for easier access
+    QnnUtils::Tensor* t_logits;
+    QnnUtils::Tensor* t_output_n_past;
+    QnnUtils::Tensor* t_input_ids;
+    QnnUtils::Tensor* t_input_ids_num_token;
+    QnnUtils::Tensor* t_input_ids_reset_kvcache;
+    QnnUtils::Tensor* t_input_ids_k_cache;
+    QnnUtils::Tensor* t_input_ids_v_cache;
+    QnnUtils::Tensor* t_input_ids_n_past;
+    float*            dequant_logits_ptr{nullptr};
+
+    // Store pointers for bert
+    QnnUtils::Tensor* b_logits;
+    QnnUtils::Tensor* b_input_ids;
+    QnnUtils::Tensor* b_attn_mask;
+
+#ifdef LLAMA_MODEL
+    // LLama specific variables
+    uint16_t position_id_dims; // Derived from model in initializeTensorPointers
+    // uint16_t position_ids_sin[1024][64];
+    // uint16_t position_ids_cos[1024][64]; // RoPE Embedding tensors. Loaded from datafile
+    std::unique_ptr<uint16_t[]> position_ids_sin; // Initialized in load_precomputed_position_ids
+    std::unique_ptr<uint16_t[]> position_ids_cos; // Initialized in load_precomputed_position_ids
+
+    QnnUtils::Tensor* t_position_ids_sin;
+    QnnUtils::Tensor* t_position_ids_cos;
+#else
+    QnnUtils::Tensor* t_position_ids;
+#endif
+
+    // n_past defines number of population of kvcache
+    size_t m_nPast{0};
+
+    // Keep track of inference count
+    int m_inference_count = 0;
+
+    QnnCpuModel(Env& env, const Params& params);
+    ~QnnCpuModel();
+
+    bool initializeModel(void);
+    bool validateModel(void);
+    bool initializeIOTensors(void);
+    bool initializeTensorPointers();
+
+    void setupInputTensors(const std::vector<int32_t>& tokens, bool run_bert_mode);
+
+    template <class T1, class T2>
+    inline bool executeModel(T1& input, T2& output, std::string graph_name);
+
+    void dumpTensors(std::string graph_name, bool dump_input);
+    void dumpTensorSpecs();
+
+    void printFinalLogs();
+
+    bool runInference(const std::vector<int32_t>& tokens, bool logits_all);
+    bool setKVCacheNPast(size_t n_past);
+
+    size_t getDequantLogits(std::vector<float>& logits, bool logits_all = false);
+
+    size_t loadKVCache(const std::string& save_path);
+    bool   saveKVCache(const std::string& load_path);
+
+  private:
+    bool m_mmap_context_bins = false; // mmap context binary files instead of reading them in memory
+    // Internal functions to separate different runInference logic
+    bool runInferenceHelper(
+            std::vector<std::string>& exec_models,
+            int32_t*                  wait_time_total,
+            int32_t*                  exec_time_total,
+            bool                      pipeline_kv_update,
+            size_t                    update_size
+    );
+
+    inline void*  getBuffer(QnnUtils::Tensor& spec) { return m_ioTensor->getBuffer(spec.tensor); }
+    inline void*  getBuffer(QnnUtils::Tensor* spec) { return m_ioTensor->getBuffer(spec->tensor); }
+    inline size_t getBufferSize(QnnUtils::Tensor& spec) { return spec.dims.getSize(); }
+    inline size_t getBufferSize(QnnUtils::Tensor* spec) { return spec->dims.getSize(); }
+    // TODO: Seems to be some issue with m_ioTensor->getBufferSize when sharing buffers
+};
+
+} // namespace qualla
+
+#endif // __QUALLA_QNN_CPU_MODEL_HPP_
diff --git a/Genie/Genie/src/qualla/engines/qnn-htp.cpp b/Genie/Genie/src/qualla/engines/qnn-htp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5e825d009be6464474ca9c25781a5f7837c4d70a
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-htp.cpp
@@ -0,0 +1,406 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "qnn-htp.hpp"
+
+#define __INFO(__fmt, ...)  _env.logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env.logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env.logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env.logger().post(Logger::ENGINE_KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::ENGINE_DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::ENGINE_TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace qualla {
+
+namespace fs = std::filesystem;
+
+bool NspEngine::load() {
+    if (_model) return true;
+
+    qualla::Timer start;
+
+    __INFO("qnn-htp: loading model");
+
+    _model = std::make_unique<QnnNspModel>(_env, _params);
+
+    // Load model
+    if (true != _model->initializeModel()) {
+        throw std::runtime_error("Failure to initialize model");
+    }
+
+    // Initialize IO Tensor buffers
+    if (true != _model->initializeIOTensors()) {
+        throw std::runtime_error("Error in setting up IO Tensors");
+    }
+
+    if (true != _model->validateModel()) {
+      //  throw std::runtime_error("Error validating model. Please check your I/O");
+    }
+
+    __INFO("qnn-htp: model has been validated!");
+
+    if (true != _model->initializeKVManager()) {
+        throw std::runtime_error("Error initializing KVCache managers");
+    }
+
+    if (true != _model->initializeTensorPointers()) {
+        throw std::runtime_error("Error : Could not find I/O tensors in loaded graphs");
+    }
+
+    if (true != _model->calculate_rope_embeddings()) {
+        throw std::runtime_error("Error : Could not load precomputed position ids");
+    }
+
+    // Initialize LoRA
+    if (_model->lora_conf == LoraConfigType::LORA_INPUT_WEIGHT_ENABLE) {
+        if (true != _model->flushLoraWeightsBuffers())
+            throw std::runtime_error("Error : Failed to flush the lora buffers");
+    }
+
+    if (true != _model->load_lmhead_weight_as_input()) {
+        throw std::runtime_error("Error : Could not load lmhead weight input");
+    }
+
+    _kpis.load.update(start.elapsed_usec());
+
+    return true;
+}
+
+bool NspEngine::unload() {
+    qualla::Timer start;
+
+    __DEBUG("qnn-htp: unloading model");
+    _model.reset(nullptr);
+
+    _kpis.unload.update(start.elapsed_usec());
+
+    return true;
+}
+
+NspEngine::NspEngine(Context& ctx, const qualla::json& json) : Engine(ctx, "qnn-htp", json) {
+    qualla::Timer start;
+
+    using FF  = Feature::Flags;
+    _features = FF::OUTPUT_LOGITS | FF::SAVE_RESTORE | FF::DYNAMIC_LOAD | FF::OUTPUT_EMBEDDINGS;
+
+    __DEBUG("qnn-htp: init start");
+
+    qualla::Config conf(json, _type + "-engine:");
+
+    // Parse config
+    _params.model_basedir = conf.optional<std::string>("model-basedir", "");
+    if (_params.model_basedir.is_relative()) {
+        _params.model_basedir = _env.path().models / _params.model_basedir;
+        _params.model_basedir = _params.model_basedir.make_preferred();
+    }
+    _params.model_list = conf.mandatory<std::vector<std::string>>("model-list");
+    // Parse model architecture
+    std::string model_architecture = conf.optional<std::string>("model-architecture-type", "decoder");
+    if (model_architecture == "decoder")
+        _params.modelArchitectureType = ModelArchitectureType::DECODER;
+    else if (model_architecture == "encoder")
+        _params.modelArchitectureType = ModelArchitectureType::ENCODER;
+    else
+        throw std::runtime_error(
+                "Only Encoder and Decoder architectures are supported. Invalid architecture supplied : " +
+                model_architecture
+        );
+
+    _params.backend_lib        = conf.optional<std::string>("backend-lib", "");
+    _params.backend_ext_conf   = conf.optional<std::string>("backend-ext-conf", "");
+    _params.ctx_size           = _ctx.size();
+    _params.mmap_budget        = conf.optional<uint64_t>("mmap-budget", 0);
+    _params.use_mmap           = conf.optional<bool>("use-mmap", true);
+    _params.use_async_Init     = conf.optional<bool>("use-async-Init", true);
+    _params.spill_fill_bufsize = conf.optional<int64_t>("spill-fill-bufsize", 0);
+    _params.kv_dim             = conf.optional<int64_t>("kv-dim", 128);
+    _params.n_embd             = _ctx.n_embd();
+    _params.pad_token          = _ctx.pad();
+    _params.variant_latency    = std::map<int, int>();
+    _params.disable_kv_cache   = conf.optional<bool>("disable-kv-cache", false);
+    _params.pooled_output      = conf.optional<bool>("pooled-output", true);
+    _params.lmhead_weight_dir  = conf.optional<std::string>("lmhead-weight-dir", "");
+    _params.graph_switching    = conf.optional<bool>("enable-graph-switching", false);
+    _params.exec_select_graphs =
+            conf.optional<std::vector<std::string>>("execute-select-graphs", {});
+    _params.load_select_graphs = conf.optional<bool>("load-select-graphs", false);
+
+    qualla::json latencies = conf.optional<qualla::json>("latency-map", {});
+    for (auto& [variant, latency] : latencies.items())
+        _params.variant_latency[std::stoi(variant)] = latency;
+    _params.kv_update_method = conf.optional<std::string>(
+            "kv-update-method", (conf.optional<int64_t>("pos-id-dim", 64) == 40) ? "SHIFT_CONCAT" : "POINTER_SHIFT"
+    );
+    _params.n_threads = conf.optional<uint32_t>("n-threads", 4);
+    if(_params.disable_kv_cache){
+        _params.n_threads = 0;
+    }
+    _params.poll      = conf.optional<bool>("poll", false);
+
+    // Positional encodings parameters
+    if (conf.json.contains("positional-encoding")) {
+        try {
+            conf.json["positional-encoding"].get_to(_params.positional_encoding_params);
+        } catch (const std::runtime_error& e) {
+            State::fatal(fmt::format("Error in positional-encoding - {}", e.what()));
+            throw std::runtime_error(State::error());
+        }
+    } else { // For Backward compatibility. May be removed in future releases
+        // __WARN("Using depracated positional encoding config. Please switch to positional-encoding");
+        auto &pos_type = _params.positional_encoding_params;
+        if(_params.modelArchitectureType == ModelArchitectureType::DECODER) {
+            pos_type.type = PositionalEncoding::ROPE;
+            pos_type.rope_params.dims = conf.optional<int64_t>("pos-id-dim", 64);
+            pos_type.rope_params.dims = conf.optional("pos-id-dims", pos_type.rope_params.dims);
+            pos_type.rope_params.theta = conf.optional<double>("rope-theta", 10000.0);
+            pos_type.rope_params.rope_scaling = conf.optional("rope-scaling", RopeScalingParams());
+        }
+        else{
+            pos_type.type = PositionalEncoding::ABSOLUTE;
+            // Other parameters for ENCODER ONLY model doesn't matter.
+        }
+    }
+    // Default LoRA is Disabled
+    uint8_t lora_version = conf.optional<uint8_t>("lora-version", 0);
+    switch(lora_version){
+        case 0: _params.lora_config_type = LoraConfigType::LORA_DISABLE; break;
+        case 1: _params.lora_config_type = LoraConfigType::LORA_INPUT_WEIGHT_ENABLE; break;
+        case 2: _params.lora_config_type = LoraConfigType::LORA_ADAPTER_WEIGHT_ENABLE; break;
+        default:  throw std::runtime_error("Lora Verison Undefined."); break;
+    }
+    // LoRA adapter setting
+    qualla::json lora_conf = conf.optional<qualla::json>("lora", {});
+    if (lora_conf.size() != 0) {
+        if (lora_conf.is_array()) {
+            for (auto lc : lora_conf) {
+                std::string lnm = lc["adapter-name"];
+                _params.lora_param[lnm].lora_name = lnm;
+                _params.lora_param[lnm].alpha_tensor_name = lc["alpha-tensor-name"];
+                _params.lora_param[lnm].alpha_tensor_val = 0.0f;
+                if (lc.contains("alpha-tensor-value")) {
+                    _params.lora_param[lnm].alpha_tensor_val = lc["alpha-tensor-value"];
+                }
+                if (_params.lora_config_type == LoraConfigType::LORA_ADAPTER_WEIGHT_ENABLE) {
+                    std::string basedir = "";
+                    if (lc.contains("binsection-basedir")) {
+                        basedir = lc["binsection-basedir"];
+                    }
+                    uint32_t n = lc["bin-sections"].size();
+                    for (uint32_t i = 0; i < n; i++) {
+                        auto binSec = lc["bin-sections"].get<std::vector<std::string>>();
+                        fs::path binsection_path = fs::path(binSec[i]);
+                        if (binsection_path.is_relative()) binsection_path = basedir / fs::path(binSec[i]);
+                        if (!fs::is_regular_file(binsection_path)) {
+                            __ERROR("qnn-htp: Can't access Lora binsection adapter : {}",
+                                    binsection_path.string());
+                            throw std::runtime_error(
+                                    "qnn-htp: Can't adapter file : " + binsection_path.string()
+                            );
+                        }
+                        _params.lora_param[lnm].binsection_list.push_back(binsection_path.string());
+                    }
+                }
+                else if( _params.lora_config_type == LoraConfigType::LORA_INPUT_WEIGHT_ENABLE ){
+                    _params.lora_param[lnm].path = lc["path"];
+                }
+            }
+        }
+    }
+
+    _params.embedding_length    = _ctx.embeddingLength();
+    _params.embedding_datatype  = _ctx.embeddingDatatype();
+
+    // cpumask needs to be a string because JSON RFC doesn't allow for hex ints.
+    std::string cpumask = conf.optional<std::string>("cpumask", "0");
+    _params.cpumask     = std::stoull(cpumask, nullptr, 0);
+
+    // Debug flags
+    _params.debug_path    = conf.optional<std::string>("debug-path", "qualla_debug");
+    _params.debug_specs   = conf.optional<bool>("debug-specs", false);
+    _params.debug_tensors = conf.optional<bool>("debug-tensors", false);
+    _params.debug_outputs  = conf.optional<bool>("debug-outputs", false);
+    _params.debug_qnn     = conf.optional<bool>("debug-qnn", false);
+
+    if (!conf.optional<bool>("dynamic-load", false)) {
+        load();
+    }
+};
+
+NspEngine::~NspEngine() {
+    unload();
+}
+
+bool NspEngine::updateKV(size_t n_past) {
+    return updateKV(n_past, {});
+}
+
+bool NspEngine::updateKV(size_t n_past, const std::vector<bool>& selected) {
+    if (!_model && !load()) return false;
+
+    qualla::Timer start;
+
+    if (n_past > _ctx.size()) {
+        __ERROR("qnn-htp: context size exceeded : n_past {}", n_past);
+        State::error("context size exceeded");
+        return false;
+    }
+
+    if (!_model->setKVCacheNPast(n_past, selected)) {
+        __ERROR("qnn-htp: Error updating KV$");
+        return false;
+    }
+
+    __DEBUG("qnn-htp: Dispatched KV$ Update (n_past={}) in {} usec", n_past, start.elapsed_usec());
+
+    _kpis.update_kv.update(start.elapsed_usec());
+
+    return true;
+}
+
+size_t NspEngine::process(
+        const std::vector<int32_t>& tokens,
+        std::vector<float>&         logits,
+        bool                        logits_all
+) {
+    return process(tokens, {}, logits, logits_all);
+}
+
+size_t NspEngine::process(
+        const std::vector<int32_t>& tokens,
+        const std::vector<int32_t>& attention_map,
+        std::vector<float>&         logits,
+        bool                        logits_all
+) {
+    if (!_model && !load()) return 0;
+
+    qualla::Timer start;
+
+    size_t n_tok = _model->runInference(tokens, attention_map, logits, logits_all);
+    if (n_tok == 0) {
+        State::error("qnn-htp : runInference failed!");
+    }
+
+    _kpis.process.update(start.elapsed_usec());
+
+    return n_tok;
+}
+
+size_t NspEngine::process(
+        std::vector<uint8_t>&       embeddings,
+        const std::vector<int32_t>& attention_map,
+        std::vector<float>&         logits,
+        bool                        logits_all
+) {
+    if (!_model && !load()) return 0;
+    qualla::Timer start;
+
+    __DEBUG("qnn-htp: inference start: n_tokens {}", embeddings.size());
+
+    size_t n_tok = _model->runInference(
+            embeddings, attention_map, logits, logits_all
+    );
+    if (n_tok == 0) {
+        State::error("qnn-htp : runInference failed!");
+    }
+    __DEBUG("qnn-htp: inference complete : {} usec", start.elapsed_usec());
+
+    _kpis.process.update(start.elapsed_usec());
+
+    return n_tok;
+}
+
+bool NspEngine::cacheEosEmbedding(std::vector<uint8_t>& eosEmbedding) {
+    if (!_model && !load()) {
+        return false;
+    }
+    return _model->cacheEosEmbedding(eosEmbedding);
+};
+
+size_t NspEngine::getEmbeddingBufferSize() {
+    return _model->getEmbeddingBufferSize();
+}
+
+bool NspEngine::set(qualla::json data) {
+    bool ret = false;
+
+    if (data.contains("kv-prefix-skip")) {
+        _model->_size_to_skip_kv_prefix = data["kv-prefix-skip"].get<size_t>();
+        ret                             = true;
+    }
+
+    if (data.contains("kv-prefix-offset")) {
+        _model->_offset_to_apply_kv_prefix = data["kv-prefix-offset"].get<size_t>();
+        ret                                = true;
+    }
+    return ret;
+}
+
+qualla::json NspEngine::get() {
+    return {{"kv-prefix-skip", _model->_size_to_skip_kv_prefix},
+            {"kv-prefix-offset", _model->_offset_to_apply_kv_prefix}};
+}
+
+
+qualla::InputType NspEngine::getInputType(){
+    return _model->m_inputType;
+}
+
+size_t NspEngine::restore(const std::string& name) {
+    if (!_model && !load()) return 0;
+
+    fs::path cache_path = std::filesystem::path(name) / fmt::format("kv-cache.{}.qnn-htp", _role);
+    return _model->loadKVCache(cache_path.string());
+}
+
+bool NspEngine::save(const std::string& name) {
+    if (!_model && !load()) return false;
+
+    fs::path cache_path = std::filesystem::path(name) / fmt::format("kv-cache.{}.qnn-htp", _role);
+    return _model->saveKVCache(cache_path.string());
+}
+
+void NspEngine::reset() {
+    if (!_model && !load()) return;
+
+    // It's enough to just drop the KV$
+    updateKV(0);
+}
+
+// Registrator instance
+static OnLoad regy([]() {
+    Engine::__register("qnn-htp", [](Context& ctx, const json& conf) {
+        return (Engine*)new NspEngine(ctx, conf);
+    });
+});
+void          needQnnHtpEngine() {}
+
+bool NspEngine::applyLoraAdapter(std::string lora_adapter_name) {
+
+    if (!_model) {
+        __ERROR("qnn-htp: applyLoraAdapter failed model not initialized");
+        return false;
+    }
+    if (_model->lora_conf == LoraConfigType::LORA_INPUT_WEIGHT_ENABLE) {
+        return _model->applyLoraWeights(lora_adapter_name);
+    }
+    else
+        return _model->applyLoraAdapter(lora_adapter_name);
+}
+
+bool NspEngine::applyLoraStrength(std::string tensor_name, float tensor_val) {
+    if (!_model) {
+        __ERROR("qnn-htp: applyLoraStrength failed model not initialized");
+        return false;
+    }
+    return _model->applyLoraStrength(tensor_name, tensor_val);
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/qnn-htp.hpp b/Genie/Genie/src/qualla/engines/qnn-htp.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4b04bb1911cea93c67b2d6c9831837baee2b9e5e
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-htp.hpp
@@ -0,0 +1,88 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef __QNN_HTP_H__
+#define __QNN_HTP_H__
+
+#include <vector>
+#include <string>
+
+#include <qualla/engine.hpp>
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/timer.hpp>
+#include <qualla/detail/onload.hpp>
+
+#include <fmt/format.h>
+
+#include "nsp-model.hpp"
+
+namespace qualla {
+
+class NspEngine : public Engine {
+  protected:
+    QnnNspModel::Params _params;
+
+    std::unique_ptr<QnnNspModel> _model;
+
+  public:
+    NspEngine(Context& ctx, const qualla::json& json);
+    virtual ~NspEngine();
+
+    virtual size_t process(
+            const std::vector<int32_t>& tokens,
+            std::vector<float>&         logits,
+            bool                        logits_all
+    ) override;
+
+    virtual size_t process(
+            const std::vector<int32_t>& tokens,
+            const std::vector<int32_t>& attention_map,
+            std::vector<float>&         logits,
+            bool                        logits_all
+    ) override;
+
+    virtual size_t process(
+        std::vector<uint8_t>&       embeddings,
+        const std::vector<int32_t>& attention_map,
+        std::vector<float>&         logits,
+        bool                        logits_all
+    ) override;
+    
+    /** Stores a precomputed EOS embedding vector. */
+    virtual bool cacheEosEmbedding(std::vector<uint8_t>& eosEmbedding) override;
+
+    void getInputQuantParam(double& scale, int& offset) {
+
+        auto tmp = _model->t_input_ids->quantParam[0];
+        scale    = tmp.scale;
+        offset   = tmp.offset;
+    }
+
+    virtual qualla::InputType getInputType() override;
+
+    virtual size_t getEmbeddingBufferSize() override;
+
+    virtual bool   updateKV(size_t n_past) override;
+    virtual bool   updateKV(size_t n_past, const std::vector<bool>& selected) override;
+    virtual bool   save(const std::string& name) override;
+    virtual size_t restore(const std::string& name) override;
+    virtual void   reset() override;
+
+    virtual bool         set(qualla::json data) override;
+    virtual qualla::json get() override;
+
+    virtual bool load() override;
+    virtual bool unload() override;
+
+    virtual bool applyLoraAdapter(std::string lora_adapter_name) override;
+    virtual bool applyLoraStrength(std::string tensor_name, float tensor_val) override;
+};
+
+} // namespace qualla
+
+#endif
diff --git a/Genie/Genie/src/qualla/engines/qnn-htp/nsp-graph.cpp b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-graph.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d0de949c4524338e6c4a3f1af773bbf945593684
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-graph.cpp
@@ -0,0 +1,304 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "qualla/detail/timer.hpp"
+
+#include "nsp-model.hpp"
+#include "nsp-graph.hpp"
+
+#include <sstream>
+
+#include "fmt/format.h"
+#include "fmt/ranges.h"
+
+// Copied from threadpool.cpp
+#if defined(_WIN32)
+    #define NOGDI
+    #include "windows.h"
+
+static int sched_yield(void) {
+    Sleep(0);
+    return 0;
+}
+#else
+    #include <sched.h>
+#endif
+
+#define __INFO(__fmt, ...)  _env.logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env.logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env.logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env.logger().post(Logger::ENGINE_KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::ENGINE_DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::ENGINE_TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __KVTRACE(__fmt, ...)                                                                      \
+    _env.logger().post(Logger::KVMANAGER_TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+namespace qualla {
+
+// GraphVariant is a self-contained graph. Represents one specific QNN Model
+GraphVariant::GraphVariant(GraphInfo_t* g_info, Qnn_ContextHandle_t qnn_ctx, int32_t n_ctx, std::map<LayerType, std::string>& layerNames)
+    : ctx_size(n_ctx), graph_name(g_info->graphName), graph_info(g_info), context_handle(qnn_ctx), m_layerNames(layerNames) {
+    //TRACE("Parsing %s with ctx_size %d", this->graph_name.c_str(), n_ctx);
+
+    for (bool io : {true, false}) {
+        uint32_t n_tensors = (io) ? graph_info->numInputTensors : graph_info->numOutputTensors;
+        auto     tensor_wrappers = (io) ? graph_info->inputTensors : graph_info->outputTensors;
+        auto&    tensor_specs    = (io) ? input_specs : output_specs;
+        for (size_t tensor_idx = 0; tensor_idx < n_tensors; tensor_idx++) {
+
+            TensorWrapper& tensor      = tensor_wrappers[tensor_idx];
+            std::string    tensor_name = QnnApi::getTensorName(tensor);
+
+            std::vector<size_t> tensor_dims;
+            if (!QnnApi::getTensorShape(tensor_dims, tensor))
+                throw std::runtime_error("Couldn't get tensor shape : " + tensor_name);
+            std::vector<QnnUtils::QuantParam> quantParams;
+            if (!QnnApi::getTensorQuantParams(&tensor, quantParams)) {
+                quantParams.emplace_back(0, 0);
+            }
+            tensor_specs[tensor_name] =
+                    QnnUtils::Tensor(&tensor, tensor_dims, quantParams);
+        }
+    }
+
+    n_tokens = static_cast<int32_t>(determineGraphInputSize());
+}
+
+// Attempt to determine input size from purely graph IO and context size
+// The easiest way is using input_ids. Else, attention_mask/position_ids can also be used
+size_t GraphVariant::determineGraphInputSize() {
+    QnnUtils::Tensor* tensor;
+    if (m_layerNames[LayerType::INPUT] == "inputs_embeds") {
+        if (!!(tensor = getInput(m_layerNames[LayerType::ATTN_MASK]))) return tensor->dims.getNumElements() / ctx_size;
+    } else {
+        if (!!(tensor = getInput(m_layerNames[LayerType::INPUT]))) return tensor->dims.getNumElements();
+        // Use past_key_out tensor to find input size
+        // The last dimension of past_key_out tensor will always be the input size
+        for (auto& [tname, qtensor] : output_specs) {
+            if (!tname.starts_with("past_key")) continue;
+            return static_cast<size_t>(qtensor.dims.channel);
+        }
+    }
+    throw std::runtime_error("Unexpected model. Couldn't determine m_num_tokens");
+}
+
+bool GraphVariant::refreshTensorQuantParams() {
+    for (bool io : {true, false}) {
+        uint32_t n_tensors = (io) ? graph_info->numInputTensors : graph_info->numOutputTensors;
+        auto     tensor_wrappers = (io) ? graph_info->inputTensors : graph_info->outputTensors;
+        auto&    tensor_specs    = (io) ? input_specs : output_specs;
+        for (size_t tensor_idx = 0; tensor_idx < n_tensors; tensor_idx++) {
+
+            TensorWrapper& tensor      = tensor_wrappers[tensor_idx];
+            std::string    tensor_name = QnnApi::getTensorName(tensor);
+            std::vector<QnnUtils::QuantParam> quantParams;
+            if (!QnnApi::getTensorQuantParams(&tensor, quantParams)) {
+                quantParams.emplace_back(0, 0);
+            }
+            tensor_specs[tensor_name].quantParam = quantParams;
+        }
+    }
+    return true;
+}
+
+QnnNspGraph::QnnNspGraph(
+        int       idx,
+        Env&      env,
+        int32_t   n_ctx,
+        QnnApi*   qnnApi,
+        IOTensor* ioTensor,
+        bool      threaded
+)
+    : _idx(idx), _env(env), ctx_size(n_ctx), g_qnn_api(qnnApi), g_buffer_mgr(ioTensor),
+      _threaded(threaded) {
+
+    if (_threaded) {
+        _lock    = new std::mutex();
+        _lock_cv = new std::condition_variable();
+    }
+    __DEBUG("qnn-htp: new-NSP-graph : n_ctx {}", n_ctx);
+}
+
+QnnNspGraph::~QnnNspGraph() {
+    __DEBUG("qnn-htp: del-NSP-graph");
+    if (kvmanager != nullptr) delete kvmanager;
+    if (_threaded) {
+        delete _lock;
+        delete _lock_cv;
+    }
+}
+
+// Parse a loaded GraphInfo_t
+bool QnnNspGraph::addGraph(GraphVariant* graph_spec) {
+    // TRACE("%d", graph_spec->n_tokens);
+    const int32_t n_tok = graph_spec->n_tokens;
+    // QNN_DEBUG("Searching for n_tokens=%d count=%lu ctx_size=%d", n_tok, variants.count(n_tok), ctx_size);
+    if (variants.find(n_tok) != variants.end()) {
+        printAvailableConfigs();
+        __ERROR("qnn-htp: addGraph detected duplicate : {} v {}", n_tok, variants[n_tok]->n_tokens);
+        throw std::runtime_error("qnn-htp: duplicate graph found, likely overflow occured");
+    }
+
+    variants[n_tok] = graph_spec;
+    return true;
+}
+
+void QnnNspGraph::printAvailableConfigs() {
+    std::stringstream config_stream;
+    for (auto& [config, _] : variants)
+        config_stream << config << ", ";
+
+    __DEBUG("config = [{}]", config_stream.str());
+}
+
+void QnnNspGraph::dumpTensors(GraphVariant* const variant, bool mode, int n_inference) const {
+    if (n_inference >= 10) return;
+
+    QnnUtils::TensorMap& tensor_specs = (mode) ? variant->input_specs : variant->output_specs;
+    std::string prefix = fmt::format("{}/{}/{:03d}", _debug_path, variant->graph_name, n_inference);
+    for (auto it = tensor_specs.begin(); it != tensor_specs.end(); ++it) {
+        auto        tname = it->first;
+        auto        tspec = it->second;
+        std::string fname = fmt::format("{}_{}_{}", prefix, (mode) ? "in" : "out", tname);
+        __TRACE("Dumping {} from {:p}", fname, g_buffer_mgr->getBuffer(tspec.tensor));
+        QnnUtils::writeRawData(g_buffer_mgr->getBuffer(tspec.tensor), tspec.dims.getSize(), fname);
+    }
+}
+
+bool QnnNspGraph::registerPointerShift(int32_t variant, int32_t ptr_offset) {
+    __TRACE("Called QnnNspGraph::registerPointerShift");
+    if (_kv_update_method != POINTER_SHIFT) return true;
+    if (kvmanager->getNumKVTensors() == 0) return true;
+    qualla::Timer start;
+
+    std::map<std::string, std::tuple<int, size_t, size_t>> allocs;
+
+    qualla::GraphVariant* graph_variant = variants.at(variant);
+    if (variant == ctx_size) {
+        // Re-map AR-c model outputs to initial state
+        for (auto& [tname, tspec] : graph_variant->output_specs) {
+            if (!tname.starts_with("past_")) continue; // Only process KV$
+            auto& [alloc_idx, offset] = tensor_alloc_info->at(tname);
+            allocs[tname]             = {alloc_idx, offset, tspec.dims.getAlignedSize()};
+        }
+    } else {
+
+        // For AR-n models, map input KV$ to appropriate offset
+        for (auto& [tname, tspec] : graph_variant->input_specs) {
+            if (!tname.starts_with("past_")) continue; // Only process KV$
+            auto out_name = tname.substr(0, tname.rfind("_")).append("_out");
+
+            auto& [alloc_idx, offset]  = tensor_alloc_info->at(out_name);
+            const bool    is_key       = tname.starts_with("past_key");
+            const int32_t extra_offset = ptr_offset * (is_key ? 1 : kvmanager->_n_embed);
+            allocs[tname] = {alloc_idx, offset + extra_offset, tspec.dims.getAlignedSize()};
+        }
+    }
+
+    if (!g_buffer_mgr->mapFusedBufferOffset(
+                graph_variant->graph_info, graph_variant->context_handle, allocs
+        )) {
+        __ERROR("Error mapping tensor to allocation buffers");
+        return false;
+    }
+
+    __DEBUG("qnn-htp: pointerShift complete : {} usec", start.elapsed_usec());
+    return true;
+}
+
+void QnnNspGraph::registerKVManager(NewNSPKVManager* mgr) {
+    kvmanager = mgr;
+    if (mgr->getNumKVTensors() == 0 && _threaded) {
+        delete _lock;
+        delete _lock_cv;
+        _threaded = false;
+    }
+    mgr->registerPointerOffsetFn([this](int32_t variant, int32_t ptr_offset) {
+        return this->registerPointerShift(variant, ptr_offset);
+    });
+}
+
+bool QnnNspGraph::execute(int n_tokens, int n_inference, int32_t wait_count) {
+    GraphVariant* variant = variants.at(n_tokens); // Assume n_tokens exists in variants
+    run_wait_time = run_exec_time = 0;             // Clear out the timer
+
+    qualla::Timer timer;
+
+    waitForLock("QnnNspGraph::execute", wait_count, false);
+    run_wait_time += timer.elapsed_usec();
+
+    // Register pointer shift
+    GraphInfo_t* const graph = variant->graph_info;
+
+    if (_debug_tensors) dumpTensors(variant, true, n_inference); // Dump input tensors
+
+    timer.reset(); // Reset the timer to calculate execution time
+    std::map<std::string, std::pair<double, uint16_t>> timeLogs;
+    if (!g_qnn_api->graphExecute(
+                graph->inputTensors, graph->outputTensors, graph->graphName, timeLogs
+        )) {
+        __ERROR("qnn-htp: graph-exec failed for {}", graph->graphName);
+        return false;
+    }
+
+    run_exec_time += timer.elapsed_usec();
+
+    if (_debug_tensors) dumpTensors(variant, false, n_inference); // Dump output tensors
+
+    timer.reset();
+    releaseLock("QnnNspGraph::execute");
+    run_wait_time += timer.elapsed_usec();
+    return true;
+}
+
+void QnnNspGraph::waitForLock(std::string requester) {
+    if (!_threaded) return;
+    __KVTRACE("qnn-lock : graph[{}] requested : {}", _idx, requester);
+    _lock->lock();
+    __KVTRACE("qnn-lock : graph[{}] locking : {}", _idx, requester);
+}
+
+void QnnNspGraph::waitForLock(std::string requester, int32_t wait_counter, bool poll) {
+    if (!_threaded) return;
+    __KVTRACE("qnn-lock : graph[{}] requested : {} (count={})", _idx, requester, wait_counter);
+
+    if (poll) {
+        _lock->lock();
+        // Busy wait until a specific update is complete
+        while (_counter < wait_counter) {
+            _lock->unlock();
+            sched_yield();
+            _lock->lock();
+        }
+    } else {
+        std::unique_lock lk(*_lock);
+        _lock_cv->wait(lk, [&] {
+            __KVTRACE("qnn-lock : graph[{}] trying ({} >= {})", _idx, _counter, wait_counter);
+            return _counter >= wait_counter;
+        });
+        lk.release();
+    }
+
+    __KVTRACE("qnn-lock : graph[{}] locking : {} (count={})", _idx, requester, wait_counter);
+    return;
+}
+
+void QnnNspGraph::releaseLock(std::string requester) {
+    if (!_threaded) return;
+    __KVTRACE("qnn-lock : graph[{}] releasing : {} (count={})", _idx, requester, _counter);
+    _lock->unlock();
+    _lock_cv->notify_one();
+}
+
+void QnnNspGraph::wakeUpLock() {
+    if (!_threaded) return;
+    _lock_cv->notify_one();
+}
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/qnn-htp/nsp-graph.hpp b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-graph.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..058a66f41bbf0c764aed67b5cd219b34f2146daa
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-graph.hpp
@@ -0,0 +1,142 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include "qualla/env.hpp"
+
+#include "QnnApi.hpp"
+#include "IOTensor.hpp"
+#include "qnn-utils.hpp"
+#include "nsp-kvmanager.hpp"
+
+namespace qualla {
+enum class LayerType {
+  INPUT,
+  OUTPUT,
+  ATTN_MASK,
+  POS_SIN,
+  POS_COS,
+  POS_IDS,
+  TOKEN_TYPE_IDS,
+  POOL_OUTPUT,
+  SEQ_OUTPUT
+};
+struct GraphVariant {
+    int32_t     n_tokens;
+    int32_t     ctx_size{-1};
+    std::string graph_name;
+
+    // QNN API specific variables
+    GraphInfo_t*        graph_info;
+    Qnn_ContextHandle_t context_handle;
+
+    QnnUtils::TensorMap input_specs;
+    QnnUtils::TensorMap output_specs;
+
+    std::map<LayerType, std::string>& m_layerNames;
+
+    GraphVariant() = delete;
+    GraphVariant(GraphInfo_t* g_info, Qnn_ContextHandle_t qnn_ctx, int32_t n_ctx, std::map<LayerType, std::string>& layerNames);
+    QnnUtils::Tensor* getTensor(const std::string& tensor_name) {
+        QnnUtils::Tensor* ret = getInput(tensor_name);
+        return (ret != nullptr) ? ret : getOutput(tensor_name);
+    }
+    QnnUtils::Tensor* getInput(const std::string& tensor_name) {
+        return input_specs.contains(tensor_name) ? &input_specs.at(tensor_name) : nullptr;
+    }
+    QnnUtils::Tensor* getOutput(const std::string& tensor_name) {
+        return output_specs.contains(tensor_name) ? &output_specs.at(tensor_name) : nullptr;
+    }
+
+    bool refreshTensorQuantParams();
+
+  private:
+    size_t determineGraphInputSize();
+};
+
+/**
+ * The idea behind QnnNspGraph is to represent "common" graphs
+ * For instance, both BERT-mode and KV$-mode are the same graph with different input sizes
+ * QnnNspGraph will contain and manage both BERT-split-n and KV$mode-split-n
+ * I/O tensors are mostly shared between these graphs, and can be managed collectively
+*/
+class QnnNspGraph {
+  private:
+    int  _idx;
+    Env& _env;
+
+    int32_t ctx_size{-1};
+
+    // Useful pointers for graph execution (managed by NSPModel)
+    QnnApi*   g_qnn_api;
+    IOTensor* g_buffer_mgr;
+
+    bool                     _threaded;
+    std::mutex*              _lock;    // Locks whenever KV$ is being used or updated
+    std::condition_variable* _lock_cv; // Wake up _lock when jobs are complete
+
+    KVManagerMode _kv_update_method{POINTER_SHIFT};
+
+    int32_t run_wait_time, run_exec_time; // Add more stats into a struct
+
+    // Debug mode settings
+    bool        _debug_specs{false};
+    bool        _debug_tensors{false};
+    std::string _debug_path;
+
+  public:
+    int32_t          _counter{-1};
+    NewNSPKVManager* kvmanager{nullptr};
+
+    // TODO: Remove this reference
+    std::map<std::string, std::pair<int, size_t>>* tensor_alloc_info;
+
+    // Keys represent input_id size (1<=input_size<=ctx_size)
+    // Values are graph description for that input_id size
+    std::map<int32_t, GraphVariant*> variants;
+
+    QnnNspGraph(
+            int       idx,
+            Env&      env,
+            int32_t   n_ctx,
+            QnnApi*   qnnApi,
+            IOTensor* ioTensor,
+            bool      threaded
+    );
+    ~QnnNspGraph();
+
+    bool addGraph(GraphVariant* graph_spec);
+    void printAvailableConfigs();
+    void registerKVManager(NewNSPKVManager* mgr);
+
+    // Given an input size, picks the correct model among the ones available
+    // This is likely not easy to implement as there's implications on KV$ management
+    size_t getOptimalModelInputSize(size_t n_past, size_t input_size) { return 0; }
+
+    GraphVariant* operator[](int32_t idx) { return variants.at(idx); }
+
+    bool                              execute(int n_tokens, int n_inference, int32_t wait_count);
+    const std::pair<int32_t, int32_t> getExecutionStats() { return {run_wait_time, run_exec_time}; }
+
+    void setDebugMode(bool debug_specs, bool debug_tensors, std::string debug_path) {
+        _debug_path    = debug_path;
+        _debug_specs   = debug_specs;
+        _debug_tensors = debug_tensors;
+    }
+    void dumpTensors(GraphVariant* const variant, bool mode, int n_inference) const;
+
+    // Mutex functions
+    void wakeUpLock();
+    void waitForLock(std::string requester = "");
+    void waitForLock(std::string requester, int32_t wait_counter, bool poll);
+    void releaseLock(std::string requester = "");
+    bool registerPointerShift(int32_t variant, int32_t ptr_offset);
+};
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/qnn-htp/nsp-kvdispatcher.cpp b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-kvdispatcher.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e72d9a0773f4e3c38a041902da1a1ccc705fc444
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-kvdispatcher.cpp
@@ -0,0 +1,319 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "nsp-kvdispatcher.hpp"
+
+#include "fmt/format.h"
+#include "fmt/ranges.h"
+
+#define __ERROR(__fmt, ...) _env.logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KVTRACE(__fmt, ...)                                                                      \
+    _env.logger().post(Logger::KVMANAGER_TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+// Copied from threadpool.cpp
+#if defined(_WIN32)
+    #define NOGDI
+    #include "windows.h"
+
+static bool __thread_affinity(uint64_t mask) {
+    HANDLE    h = GetCurrentThread();
+    DWORD_PTR m = mask;
+
+    m = SetThreadAffinityMask(h, m);
+
+    return m != 0;
+}
+
+static int sched_yield(void) {
+    Sleep(0);
+    return 0;
+}
+
+#elif defined(__APPLE__)
+static bool __thread_affinity(uint64_t mask) {
+    return true;
+}
+
+#else // posix?
+    #include <sched.h>
+    #include <string.h>
+    #include <errno.h>
+
+static bool __thread_affinity(uint64_t mask) {
+    cpu_set_t cpuset;
+    int32_t   err;
+
+    CPU_ZERO(&cpuset);
+
+    for (uint32_t i = 0; i < 64; i++) {
+        if ((1ULL << i) & mask) {
+            CPU_SET(i, &cpuset);
+        }
+    }
+
+    #ifdef __ANDROID__
+    err = sched_setaffinity(0, sizeof(cpuset), &cpuset);
+    if (err < 0) {
+        err = errno;
+    }
+    #else
+    err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+    #endif
+    if (err != 0) {
+        fprintf(stderr,
+                "warn: failed to set affinity mask 0x%llx (err %d: %s)\n",
+                (unsigned long long)mask,
+                err,
+                strerror(err));
+        return false;
+    }
+
+    return true;
+}
+
+#endif
+
+#ifdef _MSC_VER
+
+static inline void __cpu_relax(void) {
+    YieldProcessor();
+}
+
+#else
+
+    #if defined(__aarch64__)
+
+static inline void __cpu_relax(void) {
+    __asm__ volatile("yield" ::: "memory");
+}
+
+    #else
+
+static inline void __cpu_relax(void) {
+    __asm__ volatile("rep; nop" ::: "memory");
+}
+
+    #endif
+#endif
+
+namespace qualla {
+
+KVDispatcher::KVDispatcher(
+        Env&                      env,
+        std::vector<QnnNspGraph>& graphs,
+        bool                      threaded,
+        uint64_t                  cpumask
+)
+    : _env(env), _threaded(threaded), _cpumask(cpumask) {
+
+    int32_t idx = 0;
+    for (QnnNspGraph& graph : graphs) {
+        if (_threaded)
+            graph.kvmanager->registerCallback([this](int32_t split) {
+                return this->workerCallback(split);
+            });
+
+        // Initialize new DispatcherState()
+        bool active = (graph.kvmanager->getNumKVTensors() > 0);
+        _state.emplace_back(idx, active, false, &graph, KVState(), KVState(), KVState());
+        idx++;
+    }
+
+    if (_threaded) _dispatcher_thread = std::thread(&KVDispatcher::dispatchLoop, this);
+}
+
+KVDispatcher::~KVDispatcher() {
+    if (_threaded) {
+        _dispatcher_terminate = true;
+        _cv.notify_all();
+        _dispatcher_thread.join();
+    }
+}
+
+int32_t KVDispatcher::process(
+        int32_t                  split,
+        int32_t                  variant,
+        int32_t                  n_past,
+        const std::vector<bool>& selected
+) {
+    DispatcherState& state = _state[split];
+
+    state.requested.n_past   = n_past;
+    state.requested.variant  = variant;
+    state.requested.selected = selected;
+    return ++state.requested.counter;
+}
+
+int32_t KVDispatcher::dispatch(int32_t split, int32_t variant, int32_t n_past) {
+    return dispatch(split, variant, n_past, {});
+}
+int32_t KVDispatcher::dispatch(
+        int32_t                  split,
+        int32_t                  variant,
+        int32_t                  n_past,
+        const std::vector<bool>& selected
+) {
+    _variant = variant;
+
+    if (!_threaded) {
+        if (_state[split].active)
+            _state[split].graph->kvmanager->dispatchUpdate(n_past, variant, selected);
+        return 0;
+    }
+
+    if (!_state[split].active) // Increment current counter and return new value
+        return _state[split].current.counter = process(split, variant, n_past, selected);
+
+    int32_t updated_idx;
+    {
+        std::lock_guard lk(_dispatcher_lock);
+        updated_idx           = process(split, variant, n_past, selected);
+        _dispatcher_requested = true;
+    }
+
+    _cv.notify_one();
+    return updated_idx;
+}
+
+int32_t KVDispatcher::dispatch(int32_t variant, int32_t n_past) {
+    return dispatch(variant, n_past, std::vector<bool>{});
+}
+
+int32_t KVDispatcher::dispatch(int32_t variant, int32_t n_past, const std::vector<bool>& selected) {
+    _variant = variant;
+
+    if (!_threaded) {
+        for (auto& s : _state)
+            if (s.active) s.graph->kvmanager->dispatchUpdate(n_past, variant, selected);
+        return 0;
+    }
+
+    int32_t global_updated_idx = -1;
+    {
+        std::lock_guard lk(_dispatcher_lock);
+
+        for (auto& s : _state) {
+            if (!s.active) {
+                global_updated_idx =
+                        (s.current.counter = process(s.split_idx, variant, n_past, selected));
+                continue;
+            }
+
+            int32_t updated_idx = process(s.split_idx, variant, n_past, selected);
+            if (global_updated_idx == -1)
+                global_updated_idx = updated_idx;
+            else if (global_updated_idx != updated_idx) {
+                // Something went wrong. States are not in sync
+                __ERROR("qnn-kv: Dispatcher states out of sync - {} vs {}",
+                        global_updated_idx,
+                        updated_idx);
+            }
+        }
+        _dispatcher_requested = true;
+    }
+
+    _cv.notify_one();
+    return global_updated_idx;
+}
+
+void KVDispatcher::dispatchLoop() {
+    // if (_cpumask) __thread_affinity(_cpumask);
+
+    //loop dispatch
+    std::vector<int32_t> dispatch_queue;
+    dispatch_queue.reserve(_state.size());
+    std::unique_lock lk(_dispatcher_lock, std::defer_lock);
+
+    while (true) {
+        lk.lock();
+        _cv.wait(lk, [this] {
+            return _dispatcher_terminate || _dispatcher_requested || _dispatcher_job_completed;
+        });
+
+        // On exit, release all locks
+        if (_dispatcher_terminate) {
+            for (auto& s : _state) {
+                if (s.active && (s.release_lock || s.current.counter != s.queued.counter))
+                    s.graph->releaseLock("dispatcher_terminate");
+            }
+            lk.unlock();
+            break;
+        }
+
+        __KVTRACE("qnn-kv: Dispatcher ({}, {})", _dispatcher_requested, _dispatcher_job_completed);
+
+        // When a job is complete, release all relevant locks
+        if (_dispatcher_job_completed) {
+            for (auto& s : _state) {
+                if (s.release_lock) {
+                    s.graph->releaseLock("kv-update");
+                    s.release_lock = false;
+                }
+            }
+        }
+
+        for (auto& s : _state) {
+            if (!s.active) {
+                s.current = s.requested;
+                continue;
+            }
+
+            auto& current   = s.current;
+            auto& queued    = s.queued;
+            auto& requested = s.requested;
+
+            // There is no new work to be done, OR
+            // KVManager is already working on a job on this split. Wait for completion.
+            if (queued.counter == requested.counter || current.counter != queued.counter) continue;
+
+            // Requested change has already been completed
+            if (current.n_past == requested.n_past && current.variant == requested.variant) {
+                s.graph->_counter = current.counter = queued.counter = requested.counter;
+                s.graph->wakeUpLock();
+                continue;
+            }
+
+            // Job has been requested but not yet dispatched
+            s.queued = s.requested;
+            dispatch_queue.emplace_back(s.split_idx);
+        }
+
+        _dispatcher_job_completed = false; // Be ready for next job completion
+        _dispatcher_requested     = false; // Be ready for next job request
+
+        lk.unlock();
+
+        // Dispatch jobs
+        for (auto split : dispatch_queue) {
+            DispatcherState& s = _state[split];
+            s.graph->waitForLock("kv-update");
+            s.graph->kvmanager->dispatchUpdate(
+                    s.queued.n_past, s.queued.variant, s.queued.selected
+            );
+        }
+        dispatch_queue.clear();
+    }
+    __KVTRACE("qnn-kv : Dispatcher terminating");
+}
+
+int32_t KVDispatcher::workerCallback(int32_t split) {
+    __KVTRACE("qnn-kv : graph[{}] workerCallback()", split);
+    {
+        std::lock_guard lk(_dispatcher_lock);
+        // Update relevant job counters
+        _state[split].current         = _state[split].queued;
+        _state[split].graph->_counter = _state[split].current.counter;
+        _state[split].release_lock    = true;
+        _dispatcher_job_completed     = true;
+    }
+
+    _cv.notify_one();
+    return _state[split].current.counter;
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/qnn-htp/nsp-kvdispatcher.hpp b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-kvdispatcher.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..94601ea60b79bf9c70b570d467a90346f7b9d6e3
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-kvdispatcher.hpp
@@ -0,0 +1,109 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <cstring>
+#include <mutex>
+#include <condition_variable>
+#include <vector>
+
+#include "qualla/detail/timer.hpp"
+#include "qualla/detail/threadpool.hpp"
+
+#include "nsp-graph.hpp"
+#include "nsp-kvmanager.hpp"
+
+namespace qualla {
+
+struct KVState {
+    int32_t           counter;
+    int32_t           n_past;
+    int32_t           variant;
+    std::vector<bool> selected;
+    KVState() : counter(-1), n_past(-1), variant(-1) {}
+    KVState(int32_t _counter, int32_t _n_past, int32_t _variant)
+        : counter(_counter), n_past(_n_past), variant(_variant) {}
+};
+
+struct DispatcherState {
+    DispatcherState(
+            int          split_idxVal,
+            bool         activeVal,
+            bool         release_lockVal,
+            QnnNspGraph* graphVal,
+            KVState      currentVal,
+            KVState      queuedVal,
+            KVState      requestedVal
+    )
+        : split_idx(split_idxVal), active(activeVal), release_lock(release_lockVal),
+          graph(graphVal), current(currentVal), queued(queuedVal), requested(requestedVal) {}
+    int          split_idx;
+    bool         active;       // false means inactive, i.e. no KV$ to update
+    bool         release_lock; // Set to true when job is complete so we can release the lock
+    QnnNspGraph* graph;
+    KVState      current;
+    KVState      queued;
+    KVState      requested;
+};
+
+class KVDispatcher {
+  private:
+    Env&     _env;
+    bool     _threaded;
+    bool     _poll; // Currently unused
+    uint64_t _cpumask{0};
+
+    int32_t _variant{-1};
+
+    std::vector<DispatcherState> _state;
+
+    std::thread _dispatcher_thread;
+    bool        _dispatcher_terminate{false};
+    bool        _dispatcher_requested{false};
+    bool        _dispatcher_job_completed{false};
+    std::mutex  _dispatcher_lock;
+
+    std::condition_variable _cv;
+
+    // Function to add jobs to the dispatcher
+    // @param split     Determines which split to update
+    // @param variant   Variant of the model to use for updating
+    // @param n_past    Number of past updates to include in the update
+    // returns          New counter
+    int32_t process(
+            int32_t                  split,
+            int32_t                  variant,
+            int32_t                  n_past,
+            const std::vector<bool>& selected
+    );
+
+  public:
+    KVDispatcher(Env& env, std::vector<QnnNspGraph>& graphs, bool threaded, uint64_t cpumask);
+    ~KVDispatcher();
+
+    // dispatch for all splits
+    int32_t dispatch(int32_t variant, int32_t n_past);
+    int32_t dispatch(int32_t variant, int32_t n_past, const std::vector<bool>& selected);
+    int32_t dispatch(int32_t split, int32_t variant, int32_t n_past);
+    int32_t dispatch(
+            int32_t                  split,
+            int32_t                  variant,
+            int32_t                  n_past,
+            const std::vector<bool>& selected
+    );
+
+    // Callback function for worker thread to mark update job has been completed
+    int32_t workerCallback(int32_t split);
+
+    void dispatchLoop();
+
+    void    setVariant(int32_t variant) { _variant = variant; }
+    int32_t getCurVariant() { return _variant; };
+};
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/qnn-htp/nsp-kvmanager.cpp b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-kvmanager.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7046ba06c94b1c3e12233b496da152c8ae1366b4
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-kvmanager.cpp
@@ -0,0 +1,558 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "qualla/detail/timer.hpp"
+#include "qualla/detail/threadpool.hpp"
+
+#include "nsp-kvmanager.hpp"
+
+#include "fmt/format.h"
+#include "fmt/ranges.h"
+
+// Copied from threadpool.cpp
+#if defined(_WIN32)
+    #include "windows.h"
+
+static int sched_yield(void) {
+    Sleep(0);
+    return 0;
+}
+#else
+    #include <sched.h>
+#endif
+
+#define __ERROR(__fmt, ...) _env.logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __TRACE(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::ENGINE_TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __KVTRACE(__fmt, ...)                                                                      \
+    _env.logger().post(Logger::KVMANAGER_TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace qualla {
+
+NewNSPKVManager::NewNSPKVManager(
+        int                  idx,
+        Env&                 env,
+        ThreadPool*          threadpool,
+        IOTensor*            buffer_mgr,
+        QnnUtils::TensorMap& tensor_specs,
+        int32_t              ctx_size,
+        int32_t              embed_dim,
+        KVManagerMode        mode
+)
+    : _env(env), _mgr_idx(idx), _mode(mode), _n_embed(embed_dim), _n_ctx(ctx_size) {
+    // Parse KV$ Tensor names here - supports past_{key,value}_{layer_idx}[_h{head_idx}]_{in,out}
+    // TODO: Enforce tensor order during allocation as well to speed up cache loops(?)
+    std::map<uint32_t, QnnUtils::Tensor*> key_tensors, value_tensors;
+    for (auto& [tname, tensor] : tensor_specs) {
+        auto [tensor_type, layer_idx, head_idx] = parseKVTensorName(tname);
+        if (tensor_type == 0) continue;
+        if (tensor_type == 1)
+            key_tensors[layer_idx << 16 | head_idx] = &tensor;
+        else
+            value_tensors[layer_idx << 16 | head_idx] = &tensor;
+    }
+
+    if (key_tensors.size() + value_tensors.size() == 0) return;
+
+    // Calculate datatype - bitwidth and float vs quantized
+    auto rt = key_tensors.size() == 0 ? value_tensors.begin()->second : key_tensors.begin()->second;
+    _bw     = rt->dtype.bw(); // Assume same bitwidth for all tensors
+    if (rt->quantParam[0].offset == 0 && rt->quantParam[0].scale == 0)
+        _pad_value = 0; // For floating point inputs, pad value is 0
+    else // Currently only quantize 8-bit is supported. Will need to change to support 16-bit
+        _pad_value = static_cast<uint8_t>(-rt->quantParam[0].offset);
+
+    // clang-format off
+    __TRACE( "qnn-kv : {} KVManager[{} Key$ + {} Value$] : {}-bit KV$ n_embed={} n_ctx={} mode={}",
+            _mgr_idx, key_tensors.size(), value_tensors.size(), _bw*8, _n_embed, _n_ctx,
+            (_mode==POINTER_SHIFT ? "POINTER_SHIFT" : "SHIFT_CONCAT")
+    );
+    // clang-format on
+
+    _kv_cache.reserve(key_tensors.size() + value_tensors.size());
+    for (auto& [_, tensor] : key_tensors) {
+        void* buffer = buffer_mgr->getBuffer(tensor->tensor);
+        _kv_cache.emplace_back(true, (char*)buffer, (char*)buffer, tensor->dims.height);
+        _key_scales.push_back(tensor->quantParam[0].scale);
+    }
+
+    for (auto& [_, tensor] : value_tensors) {
+        void* buffer = buffer_mgr->getBuffer(tensor->tensor);
+        _kv_cache.emplace_back(false, (char*)buffer, (char*)buffer, tensor->dims.height);
+        _value_scales.push_back(tensor->quantParam[0].scale);
+    }
+
+    // Calculate _max_n_heads
+    for (auto &cache : _kv_cache)
+      _max_n_heads = cache.n_heads > _max_n_heads ? cache.n_heads : _max_n_heads;
+
+    // clang-format off
+    __TRACE( "qnn-kv : {} KVManager[{} Key$ + {} Value$] : n_heads<={} n_embed={} n_ctx={} mode={}",
+            _mgr_idx, key_tensors.size(), value_tensors.size(), _max_n_heads,  _n_embed, _n_ctx,
+            (_mode==POINTER_SHIFT ? "POINTER_SHIFT" : "SHIFT_CONCAT")
+    );
+    // clang-format on
+
+    if (threadpool != nullptr && threadpool->size() > 0) {
+        _threadpool = threadpool;
+        n_threads   = threadpool->size();
+        _sync       = 0;
+
+        _update_jobs.reserve(n_threads + 1);
+        if (_mode == POINTER_SHIFT)
+            _update_jobs.push_back([this] { this->registerPointerOffset(); });
+
+        for (int idx = 0; idx < n_threads; idx++)
+            _update_jobs.push_back([this, idx] { this->runKVUpdateJob(idx); });
+    }
+
+    _callback_fn = [](int32_t a) { return 0; };
+}
+
+NewNSPKVManager::~NewNSPKVManager() {}
+
+// Parse KV$ Tensor names here - supports past_{key,value}_{layer_idx}[_h{head_idx}]_{in,out}
+std::tuple<int, uint16_t, uint16_t> NewNSPKVManager::parseKVTensorName(std::string name) {
+    if (!name.starts_with("past_")) return {0, 0, 0};
+
+    const bool   is_key = name.starts_with("past_key");
+    const size_t pos0   = (is_key) ? 9 : 11; // "past_key_" OR "past_value_"
+    const size_t pos1   = name.find('_', pos0);
+    const size_t pos2   = name.find('_', pos1 + 2);
+
+    uint16_t layer_idx = 0, head_idx = 0;
+    layer_idx = static_cast<uint16_t>(std::stoi(name.substr(pos0, pos1 - pos0)));
+    if (pos2 != std::string::npos)
+        head_idx = static_cast<uint16_t>(std::stoi(name.substr(pos1 + 2, pos2 - pos1 - 2)));
+
+    return std::make_tuple(is_key ? 1 : 2, layer_idx, head_idx);
+}
+
+// Switch key cache from AR-m to AR-n (relative to ctx_size)
+bool NewNSPKVManager::switchKeyVariant(KVCache cache, int32_t m, int32_t n, int32_t offset) {
+    const size_t in_cache_dim  = (m == _n_ctx) ? _n_ctx : _n_ctx - m;
+    const size_t out_cache_dim = _n_ctx - n;
+    const size_t n_heads       = cache.n_heads;
+
+    const size_t read_row_size  = in_cache_dim * _bw;
+    const size_t write_row_size = out_cache_dim * _bw;
+    const size_t offset_size    = offset * _bw;
+
+    if (in_cache_dim > out_cache_dim) {
+        char* read_ptr  = cache.buffer + read_row_size - write_row_size + offset_size;
+        char* write_ptr = cache.buffer + offset_size;
+
+        for (int i = 0; i < n_heads * _n_embed; i++) {
+            std::memmove(write_ptr, read_ptr, write_row_size);
+            read_ptr += read_row_size;
+            write_ptr += write_row_size;
+        }
+    } else {
+        const size_t block_size_delta = write_row_size - read_row_size;
+
+        char* read_ptr  = cache.buffer + (n_heads * _n_embed - 1) * read_row_size + offset_size;
+        char* write_ptr = cache.buffer + (n_heads * _n_embed - 1) * write_row_size + offset_size;
+
+        for (int i = 0; i < n_heads * _n_embed; i++) {
+            std::memmove(write_ptr + block_size_delta, read_ptr, read_row_size);
+            std::memset(write_ptr, _pad_value, block_size_delta);
+            read_ptr -= read_row_size;
+            write_ptr -= write_row_size;
+        }
+    }
+
+    return true;
+}
+
+// Switch value cache from AR-m to AR-n (relative to ctx_size)
+bool NewNSPKVManager::switchValueVariant(KVCache cache, int32_t m, int32_t n, int32_t offset) {
+    const size_t in_cache_dim  = (m == _n_ctx) ? _n_ctx : _n_ctx - m;
+    const size_t out_cache_dim = _n_ctx - n;
+    const size_t n_heads       = cache.n_heads;
+
+    const size_t read_block_size  = in_cache_dim * _n_embed * _bw;
+    const size_t write_block_size = out_cache_dim * _n_embed * _bw;
+    const size_t offset_size      = offset * _n_embed * _bw;
+
+    if (in_cache_dim > out_cache_dim) {
+        char* read_ptr  = cache.buffer + read_block_size - write_block_size + offset_size;
+        char* write_ptr = cache.buffer + offset_size;
+
+        for (int i = 0; i < n_heads; i++) {
+            std::memmove(write_ptr, read_ptr, write_block_size);
+            read_ptr += read_block_size;
+            write_ptr += write_block_size;
+        }
+    } else {
+        const size_t block_size_delta = write_block_size - read_block_size;
+
+        char* read_ptr  = cache.buffer + (n_heads - 1) * read_block_size + offset_size;
+        char* write_ptr = cache.buffer + (n_heads - 1) * write_block_size + offset_size;
+
+        for (int i = 0; i < n_heads; i++) {
+            std::memmove(write_ptr + block_size_delta, read_ptr, read_block_size);
+            std::memset(write_ptr, _pad_value, block_size_delta);
+            read_ptr -= read_block_size;
+            write_ptr -= write_block_size;
+        }
+    }
+
+    return true;
+}
+
+// clang-format off
+bool NewNSPKVManager::updateKey(KVCache cache, int32_t variant, int32_t n_update, int32_t offset, const std::vector<bool>& selected) {
+    // clang-format on
+    char* dst = cache.buffer;
+    char* src = cache.output_buffer;
+
+    if (n_update < 0) {
+        const int32_t n_iter    = cache.n_heads * _n_embed;
+        const int32_t iter_size = (_n_ctx - variant) * _bw;
+        const int32_t copy_size = -n_update * _bw;
+
+        if (_mode == SHIFT_CONCAT) {
+            std::memmove(dst + copy_size, dst, n_iter * iter_size - copy_size);
+            std::memset(dst, _pad_value, copy_size);
+        } else if (_mode == POINTER_SHIFT) {
+            char* write_ptr = dst + offset * _bw + iter_size - copy_size;
+            for (int32_t i = 0; i < n_iter; i++) {
+                std::memset(write_ptr, _pad_value, copy_size);
+                write_ptr += iter_size;
+            }
+        }
+
+        return true;
+    }
+
+    const int32_t n_iter    = cache.n_heads * _n_embed;
+    const int32_t iter_size = (_n_ctx - variant) * _bw;
+    const int32_t copy_size = n_update * _bw;
+    const int32_t out_size  = variant * _bw;
+
+    if (_mode == SHIFT_CONCAT) // Shift KV$ buffer if necessary
+        std::memmove(dst, dst + copy_size, n_iter * iter_size - copy_size);
+
+    // Concatenate output into the KV$ buffers
+    char* read_ptr  = src; // output_buffer
+    char* write_ptr = dst + offset * _bw + iter_size - ((_mode == POINTER_SHIFT) ? 0 : copy_size);
+
+    if (selected.empty()) {
+        for (int32_t i = 0; i < n_iter; i++) {
+            std::memcpy(write_ptr, read_ptr, copy_size);
+            write_ptr += iter_size;
+            read_ptr += out_size;
+        }
+    } else {
+        for (int32_t i = 0; i < n_iter; i++) {
+            auto wp = write_ptr, rp = read_ptr;
+            for (auto sel : selected) {
+                for (int i = 0; i < _bw; i++) {
+                    if (sel) *wp++ = *rp;
+                    ++rp;
+                }
+            }
+            write_ptr += iter_size;
+            read_ptr += out_size;
+        }
+    }
+
+    return true;
+}
+
+// clang-format off
+bool NewNSPKVManager::updateValue(KVCache cache, int32_t variant, int32_t n_update, int32_t offset, const std::vector<bool>& selected) {
+    // clang-format on
+    char* dst = cache.buffer;
+    char* src = cache.output_buffer;
+
+    if (n_update < 0) {
+        const int32_t n_iter    = cache.n_heads;
+        const int32_t iter_size = (_n_ctx - variant) * _n_embed * _bw;
+        const int32_t copy_size = -n_update * _n_embed * _bw;
+        if (_mode == SHIFT_CONCAT) {
+            std::memmove(dst + copy_size, dst, cache.n_heads * iter_size - copy_size);
+            std::memset(dst, _pad_value, copy_size);
+        } else if (_mode == POINTER_SHIFT) {
+            char* write_ptr = dst + offset * _n_embed * _bw + iter_size - copy_size;
+            for (int32_t i = 0; i < n_iter; i++) {
+                std::memset(write_ptr, _pad_value, copy_size);
+                write_ptr += iter_size;
+            }
+        }
+
+        return true;
+    }
+
+    const int32_t n_iter    = cache.n_heads;
+    const int32_t iter_size = (_n_ctx - variant) * _n_embed * _bw;
+    const int32_t copy_size = n_update * _n_embed * _bw;
+    const int32_t out_size  = variant * _n_embed * _bw;
+
+    if (_mode == SHIFT_CONCAT) // Shift KV$ buffer if necessary
+        std::memmove(dst, dst + copy_size, cache.n_heads * iter_size - copy_size);
+
+    // Concatenate output into the KV$ buffers
+    char* read_ptr  = src;
+    char* write_ptr = dst + offset * _n_embed * _bw + iter_size;
+    if (_mode != POINTER_SHIFT) write_ptr -= copy_size;
+    if (selected.empty()) {
+        for (int i = 0; i < cache.n_heads; i++) {
+            std::memcpy(write_ptr, read_ptr, copy_size);
+            write_ptr += iter_size;
+            read_ptr += out_size;
+        }
+    } else {
+        for (int i = 0; i < cache.n_heads; i++) {
+            auto wp = write_ptr, rp = read_ptr;
+            for (auto sel : selected) {
+                if (sel) {
+                    std::memcpy(wp, rp, _n_embed * _bw);
+                    wp += _n_embed * _bw;
+                }
+                rp += _n_embed * _bw;
+            }
+            write_ptr += iter_size;
+            read_ptr += out_size;
+        }
+    }
+    return true;
+}
+
+bool NewNSPKVManager::registerPointerOffset() {
+    int32_t variant    = _req_state.variant;
+    int32_t ptr_offset = _req_state.ptr_offset;
+    __KVTRACE("qnn-kv : graph[{}] pointerShift({} @ AR-{})", _mgr_idx, ptr_offset, variant);
+    _register_pointer_fn(variant, ptr_offset * _bw);
+
+    if (_threadpool != nullptr) {
+        const int rem = --_sync;
+        __KVTRACE("qnn-kv : graph[{}] pointerShift complete ({} remain)", _mgr_idx, rem);
+        if (rem == 0) updateState();
+    }
+    return true;
+}
+
+bool NewNSPKVManager::updateState() {
+    // clang-format off
+    __TRACE("qnn-kv : graph[{}] updateState to AR-{}(n_past={}, ptr={})", _mgr_idx,
+        _req_state.variant, _req_state.n_past, _req_state.ptr_offset);
+    // clang-format on
+
+    if (_cur_state.variant != _req_state.variant) {
+        int idx = 0;
+        for (KVCache& cache : _kv_cache) {
+            const int32_t dim_size = _n_ctx - _req_state.variant;
+            cache.output_buffer    = cache.buffer + dim_size * cache.n_heads * _n_embed * _bw;
+
+            if (_mode == POINTER_SHIFT)
+                cache.output_buffer += cache.is_key ? _n_ctx * _bw : _n_ctx * _n_embed * _bw;
+        }
+    }
+
+    _cur_state = _req_state;
+    _counter   = _callback_fn(_mgr_idx);
+    return true;
+}
+
+// Function executes on the threadpool - called once per thread.
+// Assumes the lock is properly attained by this point
+void NewNSPKVManager::runKVUpdateJob(int thread_idx) {
+    // clang-format off
+    __KVTRACE(
+        "qnn-kv : graph[{}] tid[{}] kv-update started. {} ",
+        _mgr_idx, thread_idx, modeStr(_req_mode));
+    // clang-format on
+    int job_count = 1 + ((getNumKVTensors() - 1) / n_threads); // Number of jobs per thread
+    int end_idx   = job_count * (thread_idx + 1);
+    if (end_idx > getNumKVTensors()) end_idx = getNumKVTensors();
+
+    for (int idx = job_count * thread_idx; idx < end_idx; idx++) {
+        KVCache& cache = _kv_cache[idx];
+
+        auto& [variant, n_past, ptr_offset, selected] = _cur_state;
+        const int32_t n_update                        = _req_state.n_past - n_past;
+
+        if (cache.is_key) {
+            if (_req_mode == CLEAR_CACHE) clearBuffer(cache);
+            if (_req_mode == UPDATE_OUTPUT || _req_mode == UPDATE_AND_SET) {
+                updateKey(cache, variant, n_update, ptr_offset, _req_state.selected);
+            }
+            if (_req_mode == SET_VARIANT || _req_mode == UPDATE_AND_SET) {
+                switchKeyVariant(cache, variant, _req_state.variant, _req_state.ptr_offset);
+            }
+        } else {
+            if (_req_mode == CLEAR_CACHE) clearBuffer(cache);
+            if (_req_mode == UPDATE_OUTPUT || _req_mode == UPDATE_AND_SET) {
+                updateValue(cache, variant, n_update, ptr_offset, _req_state.selected);
+            }
+            if (_req_mode == SET_VARIANT || _req_mode == UPDATE_AND_SET) {
+                switchValueVariant(cache, variant, _req_state.variant, _req_state.ptr_offset);
+            }
+        }
+    }
+
+    if (_threadpool != nullptr) {
+        const int rem = --_sync;
+        __KVTRACE("qnn-kv : graph[{}] tid[{}] kv-update ({} remain)", _mgr_idx, thread_idx, rem);
+        if (rem == 0) updateState();
+    } else // Without threading, this is only called once so we can updateState() immediately
+        updateState();
+}
+
+void NewNSPKVManager::dispatchUpdate(
+        int32_t                  n_past,
+        int32_t                  variant,
+        const std::vector<bool>& selected
+) {
+    // clang-format off
+    __KVTRACE("qnn-kv : graph[{}] dispatchUpdate AR-{}(n_past={}, ptr={}) -> AR-{}(n_past={})",
+        _mgr_idx, _cur_state.variant, _cur_state.n_past, _cur_state.ptr_offset, variant, n_past);
+    // clang-format on
+
+    bool skip_update = false;
+    _req_state       = {variant, n_past, _cur_state.ptr_offset, selected};
+
+    if (_req_state.n_past == 0) {
+        _req_mode             = CLEAR_CACHE;
+        _req_state.ptr_offset = 0;
+
+        // Nothing to be done iff
+        // - Requested variant is BERT Mode, i.e. takes no input (new_variant == _n_ctx)
+        // - Cache is already empty (n_past == 0)
+        if (_req_state.variant == _n_ctx || _cur_state.n_past == 0) _req_mode = NO_OP;
+    } else if (_req_state.n_past == _cur_state.n_past) {
+        _req_mode = SET_VARIANT;
+        // Nothing needs to be done iff
+        // - Cache is empty (n_past == 0). Might want to check for BERT->AR-1
+        // - Requested variant is already set (new_variant == cur_variant)
+        // - Requested variant is BERT Mode, i.e. takes no input (new_variant == _n_ctx)
+        if (_cur_state.n_past == 0 || _req_state.variant == _n_ctx ||
+            _req_state.variant == _cur_state.variant)
+            _req_mode = NO_OP;
+        if (_req_state.variant == _n_ctx) _req_state.ptr_offset = 0;
+
+    } else if (_req_state.n_past < _cur_state.n_past) {
+        _req_mode = UPDATE_OUTPUT;
+        if (_mode == POINTER_SHIFT)
+            _req_state.ptr_offset -= (_cur_state.n_past - _req_state.n_past);
+
+    } else if (_req_state.variant == _cur_state.variant) { // UPDATE_OUTPUT
+        _req_mode = UPDATE_OUTPUT;
+        if (_cur_state.variant == _n_ctx)
+            _req_mode = NO_OP;
+        else if (_mode == POINTER_SHIFT)
+            _req_state.ptr_offset += (_req_state.n_past - _cur_state.n_past);
+
+    } else {
+        _req_mode = UPDATE_AND_SET;
+
+        if (_cur_state.variant == _n_ctx)
+            _req_mode = SET_VARIANT;
+        else if (_req_state.variant == _n_ctx) {
+            _req_state.n_past = 0;
+            _req_mode         = NO_OP; // If we're switching to BERT-Mode, nothing to do
+        }
+
+        if (_req_mode == UPDATE_AND_SET && _cur_state.variant != _n_ctx && _mode == POINTER_SHIFT)
+            _req_state.ptr_offset += (_req_state.n_past - _cur_state.n_past);
+    }
+
+    // clang-format off
+    __KVTRACE("qnn-kv : graph[{}] Processing {} AR-{}(n_past={}, ptr={})",
+        _mgr_idx, modeStr(_req_mode), _req_state.variant, _req_state.n_past, _req_state.ptr_offset);
+    // clang-format on
+
+    if (_req_mode == NO_OP) {
+        // TODO: Think about this case a bit more. Any other cases we want to registerPtrOffset()?
+        bool needs_register_ptr =
+                (_mode == POINTER_SHIFT && (_cur_state.variant != _req_state.variant ||
+                                            _cur_state.ptr_offset != _req_state.ptr_offset));
+
+        if (needs_register_ptr) {
+            if (_threadpool != nullptr) {
+                _sync += 1;
+                registerPointerOffset();
+            } else {
+                registerPointerOffset();
+                updateState();
+            }
+        } else
+            updateState();
+        return;
+    }
+
+    if (_threadpool != nullptr) {
+        _sync += _update_jobs.size();
+        _threadpool->enqueue(_update_jobs);
+    } else {
+        runKVUpdateJob(0);
+        if (_mode == POINTER_SHIFT) registerPointerOffset();
+        updateState();
+    }
+}
+
+bool NewNSPKVManager::loadCache(
+        std::ifstream* fs,
+        bool           is_key,
+        int32_t        n_valid,
+        int32_t        variant,
+        int32_t        n_heads
+) {
+    __TRACE("qnn-kv : KVManager[{}] load cache", _mgr_idx);
+    const size_t cache_dim = (variant == _n_ctx) ? _n_ctx : _n_ctx - variant;
+    const size_t iter_size = (is_key) ? cache_dim * _bw : cache_dim * _n_embed * _bw;
+    const size_t copy_size = (is_key) ? n_valid * _bw : n_valid * _n_embed * _bw;
+
+    for (KVCache& cache : _kv_cache) {
+        if (cache.is_key != is_key) continue;
+
+        clearBuffer(cache);
+        const int n_iter = (is_key) ? cache.n_heads * _n_embed : cache.n_heads;
+        char*     data   = (char*)cache.buffer + iter_size - copy_size;
+        for (int i = 0; i < n_iter; i++) {
+            fs->read(data, copy_size);
+            data += iter_size; // Jump to the next row/block (depending on type)
+        }
+
+        if (n_heads > cache.n_heads)
+            fs->seekg((n_heads - cache.n_heads) * _n_embed * n_valid * _bw, std::ios::cur);
+    }
+
+    _req_state = {variant, n_valid, 0};
+    updateState();
+
+    return true;
+}
+
+bool NewNSPKVManager::dumpCache(std::ofstream* fs, bool is_key, int32_t n_valid, int32_t n_heads) {
+    __TRACE("qnn-kv : graph[{}] dump cache", _mgr_idx);
+    const int32_t variant    = _cur_state.variant;
+    const int32_t ptr_offset = _cur_state.ptr_offset;
+    const size_t  cache_dim  = (variant == _n_ctx) ? _n_ctx : _n_ctx - variant;
+
+    const size_t iter_size   = (is_key) ? cache_dim * _bw : cache_dim * _n_embed * _bw;
+    const size_t copy_size   = (is_key) ? n_valid * _bw : n_valid * _n_embed * _bw;
+    const size_t offset_size = (is_key) ? ptr_offset * _bw : ptr_offset * _n_embed * _bw;
+
+    for (KVCache& cache : _kv_cache) {
+        if (cache.is_key != is_key) continue;
+
+        const int n_iter = (is_key) ? cache.n_heads * _n_embed : cache.n_heads;
+        char*     data   = (char*)cache.buffer + offset_size + iter_size - copy_size;
+        for (int i = 0; i < n_iter; i++) {
+            fs->write(data, copy_size);
+            data += iter_size; // Jump to the next row/block (depending on type)
+        }
+
+        if (n_heads > cache.n_heads)
+            fs->seekp((n_heads - cache.n_heads) * _n_embed * n_valid * _bw, std::ios::cur);
+    }
+    return true;
+}
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/qnn-htp/nsp-kvmanager.hpp b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-kvmanager.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f1208aa5364193f472c3bd88e6cbd00a7a813854
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-kvmanager.hpp
@@ -0,0 +1,163 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <cstring>
+
+#include "QnnApi.hpp"
+#include "IOTensor.hpp"
+#include "qnn-utils.hpp"
+
+#include <fstream>
+
+#include "qualla/env.hpp"
+
+namespace qualla {
+
+
+enum KVUpdateMode {
+    NO_OP          = 0x0,
+    CLEAR_CACHE    = 0x1,
+    SET_VARIANT    = 0x2,
+    UPDATE_OUTPUT  = 0x4,
+    UPDATE_AND_SET = 0x8
+};
+
+static std::string modeStr(KVUpdateMode mode) {
+    if (mode == CLEAR_CACHE) return "CLEAR_CACHE";
+    if (mode == SET_VARIANT) return "SET_VARIANT";
+    if (mode == UPDATE_OUTPUT) return "UPDATE_OUTPUT";
+    if (mode == UPDATE_AND_SET) return "UPDATE_AND_SET";
+    return "NO_OP";
+}
+
+struct KVCache {
+    bool     is_key;
+    char* buffer;
+    char* output_buffer;
+    int32_t  n_heads;
+    KVCache() {}
+    KVCache(bool is_key_val, char* buffer_val, char* output_buffer_val, int32_t n_heads_val) :
+            is_key(is_key_val), buffer(buffer_val), output_buffer(output_buffer_val), n_heads(n_heads_val) {}
+};
+
+class NewNSPKVManager {
+  private:
+    Env& _env;
+    int  _mgr_idx; // Identify KVManager in the logs
+
+    ThreadPool*     _threadpool{nullptr}; // Threadpool for async background processing
+    std::atomic_int _sync{0};
+
+    std::vector<std::function<void()>> _update_jobs;
+
+    KVManagerMode _mode{POINTER_SHIFT};
+
+    std::vector<KVCache> _kv_cache; // <is_key, buffer, out_buffer, n_heads>
+    std::vector<double>  _key_scales, _value_scales;
+    int32_t              _max_n_heads{0};
+
+    // Caputre states
+    struct KVManagerState {
+        int32_t           variant;
+        int32_t           n_past;
+        int32_t           ptr_offset;
+        std::vector<bool> selected;
+    };
+
+    KVManagerState _cur_state{-1, -1, 0, {}};
+    KVManagerState _req_state{-1, -1, 0, {}};
+    KVUpdateMode   _req_mode{NO_OP};
+
+    int32_t _counter{-1}; // Auto-increment variable for syncing updates
+    int32_t n_threads{1};
+
+    // Variant (n) stores AR-n for which the cache is currently formatted
+    // The following variables are strictly dependent on variant n. Make sure to update accordingly
+    size_t key_output_offset, value_output_offset;
+
+    // Parse KV$ Tensor names here - supports past_{key,value}_{layer_idx}[_h{head_idx}]_{in,out}
+    std::tuple<int, uint16_t, uint16_t> parseKVTensorName(std::string name);
+
+    // KV Manager Utility functions
+    void clearBuffer(KVCache cache) {
+        std::memset(cache.buffer, _pad_value, cache.n_heads * _n_ctx * _n_embed * _bw);
+    }
+
+    bool switchKeyVariant(KVCache cache, int32_t m, int32_t n, int32_t ptr_offset);
+    bool switchValueVariant(KVCache cache, int32_t m, int32_t n, int32_t ptr_offset);
+    bool updateKey(
+            KVCache                  cache,
+            int32_t                  variant,
+            int32_t                  n_update,
+            int32_t                  offset,
+            const std::vector<bool>& selected
+    );
+    bool updateValue(
+            KVCache                  cache,
+            int32_t                  variant,
+            int32_t                  n_update,
+            int32_t                  offset,
+            const std::vector<bool>& selected
+    );
+
+    // For pointer shift
+    std::map<std::string, std::pair<int, size_t>>* _alloc_info;
+    bool registerPointerOffset(); // Register offsets for POINTER_SHIFT
+
+    std::function<int32_t(int32_t)>       _callback_fn;
+    std::function<bool(int32_t, int32_t)> _register_pointer_fn;
+
+  public:
+    uint8_t _pad_value; // Assumes all tensors have a common zero point @ 128
+    int8_t  _bw{1};     // Bitwidth of KV$ values. Defaults to 8-bit KV$
+    int32_t _n_embed{-1};
+    int32_t _n_ctx{-1};
+
+    // clang-format off
+    NewNSPKVManager( int idx, Env& env, ThreadPool* threadpool, IOTensor* buffer_mgr,
+        QnnUtils::TensorMap &tensor_specs, int32_t ctx_size, int32_t embed_dim, KVManagerMode mode);
+    // clang-format on
+    ~NewNSPKVManager();
+
+    bool loadCache(
+            std::ifstream* fs,
+            bool           is_key,
+            int32_t        n_valid,
+            int32_t        variant,
+            int32_t        n_heads
+    );
+    bool dumpCache(std::ofstream* fs, bool is_key, int32_t n_valid, int32_t n_heads);
+
+    bool updateState();
+    void runKVUpdateJob(int thread_idx); // Worker thread function
+    void setTensorAllocInfo(std::map<std::string, std::pair<int, size_t>>* alloc_info) {
+        _alloc_info = alloc_info;
+    }
+    void registerCallback(std::function<int32_t(int32_t)> callback_fn) {
+        _callback_fn = callback_fn;
+    }
+
+    // TODO: Cleanup and remove this function. KVManager should handle all alloc/register for KV$
+    void registerPointerOffsetFn(std::function<bool(int32_t, int32_t)> register_fn) {
+        _register_pointer_fn = register_fn;
+    }
+
+    void dispatchUpdate(int32_t new_n_past, int32_t variant, const std::vector<bool>& selected);
+
+    const size_t         getNumKVTensors() const { return _kv_cache.size(); }
+    const int32_t        getMaxNHeads() const { return _max_n_heads; }
+    int32_t              getCurOffset() { return _cur_state.ptr_offset; }
+    int32_t              getCurVariant() { return _cur_state.variant; }
+    int32_t              getNPast() { return _cur_state.n_past; }
+    std::vector<double>& getKeyScales() { return _key_scales; }
+    std::vector<double>& getValueScales() { return _value_scales; }
+};
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/qnn-htp/nsp-model.cpp b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-model.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..24da5f8473e41e9fb61bf91b2d4940977873c5d8
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-model.cpp
@@ -0,0 +1,2626 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#define _USE_MATH_DEFINES // Used for M_PI
+
+#include "qualla/env.hpp"
+#include "qualla/detail/timer.hpp"
+#include "qualla/detail/cache-file.hpp"
+
+#include "fmt/format.h"
+#include "fmt/ranges.h"
+#include "fmt/os.h"
+#include <iostream>
+#include "nsp-model.hpp"
+
+#include <set>
+#include <cstring>
+#include <fstream>
+#include <sstream>
+#include <cassert>
+#include <cstdio>
+#include <span>
+#include "fp16/fp16.h"
+
+namespace fs = std::filesystem;
+
+#define __INFO(__fmt, ...)  _env.logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env.logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env.logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env.logger().post(Logger::ENGINE_KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::ENGINE_DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::ENGINE_TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace qualla {
+
+QnnNspModel::QnnNspModel(Env& env, const Params& params)
+    : _env(env), model_basedir(params.model_basedir) {
+    // Initialize QnnAPI
+    m_qnnApi = std::unique_ptr<QnnApi>(new QnnApi());
+
+    spill_fill_buffer_size = params.spill_fill_bufsize;
+    m_kv_dim               = params.kv_dim;
+    m_use_mmap             = params.use_mmap;
+    m_use_async_Init       = params.use_async_Init;
+    mmap_budget            = params.mmap_budget;
+    m_ctx_size             = params.ctx_size;
+    m_pad_token            = params.pad_token;
+    lmhead_weight_dir      = params.lmhead_weight_dir;
+    graph_switching        = params.graph_switching;
+    load_select_graphs     = params.load_select_graphs;
+    lora_conf              = params.lora_config_type;
+    embedding_length       = params.embedding_length;
+    embedding_datatype     = params.embedding_datatype;
+    m_disableKvCache       = params.disable_kv_cache;
+    m_embd_size            = params.n_embd;
+    m_modelArchitectureType = params.modelArchitectureType;
+    // Positional encoding parameters
+    m_positional_encoding = params.positional_encoding_params;
+    if (m_positional_encoding.type == PositionalEncoding::ROPE) // Save m_pos_dim for easy access
+        m_pos_dim = m_positional_encoding.rope_params.dims;
+
+    // Debug flags
+    _debug_path    = params.debug_path;
+    _debug_specs   = params.debug_specs;
+    _debug_tensors = params.debug_tensors;
+    _debug_outputs = params.debug_outputs;
+    _debug_qnn     = params.debug_qnn;
+
+    _backend_lib      = params.backend_lib;
+    _backend_ext_conf = params.backend_ext_conf;
+
+    if (graph_switching && !m_use_mmap)
+        __WARN("Graph switching with non-mmaped implementation can cause high sustained memory usage"
+        );
+
+    variant_latency = params.variant_latency;
+
+    if(m_modelArchitectureType == ModelArchitectureType::ENCODER){
+        m_pooled_output             = params.pooled_output;
+    }
+
+    exec_select_graphs = params.exec_select_graphs;
+    if (!exec_select_graphs.empty())
+        __DEBUG("qnn-htp : Execute selected graphs = {}", exec_select_graphs);
+
+    _kv_update_method = (params.kv_update_method == "POINTER_SHIFT") ? POINTER_SHIFT : SHIFT_CONCAT;
+    __DEBUG("qnn-htp : NSP KV$ Update Method = {}",
+            (_kv_update_method == POINTER_SHIFT) ? "POINTER_SHIFT" : "SHIFT_CONCAT");
+
+    // Set up filename list.
+    for (auto& i : params.model_list) {
+        fs::path model_path = fs::path(i);
+        if (model_path.is_relative()) model_path = model_basedir / fs::path(i);
+        if (!fs::is_regular_file(model_path)) {
+            __ERROR("NSPModel: Can't access model file : {}", model_path.string());
+            throw std::runtime_error("NSPModel: Can't access model file : " + model_path.string());
+        }
+        model_filelist.push_back(model_path.string());
+    }
+
+    if (lora_conf != LoraConfigType::LORA_DISABLE) {
+        lora_config.insert(params.lora_param.begin(), params.lora_param.end());
+    }
+
+    if (params.n_threads > 0) {
+        _threaded = true;
+        _cpumask  = params.cpumask;
+        __DEBUG("qnn-htp: starting threadpool : n_threads {} params. {:#x} poll {}",
+                params.n_threads,
+                _cpumask,
+                params.poll);
+        threadpool.start(params.n_threads, _cpumask, params.poll);
+    }
+
+    // Initialize QNN IO Tensor
+    m_ioTensor = std::unique_ptr<IOTensor>(new IOTensor(
+            m_sharedBuffer ? BufferAlloc::SHARED_BUFFER : BufferAlloc::DEFAULT,
+            m_sharedBuffer ? m_qnnApi->getQnnInterfaceVer() : nullptr
+    ));
+
+    m_qnnApi->setIOTensorBufferMgr(m_ioTensor.get());
+    m_qnnApi->setKVDim(m_kv_dim);
+    m_qnnApi->setContextSize(m_ctx_size);
+    m_qnnApi->setKVUpdateMethod(_kv_update_method);
+
+    if (params.debug_specs || params.debug_tensors) {
+        if (!fs::exists(params.debug_path) && !fs::create_directories(params.debug_path))
+            throw std::runtime_error("Could not create debug directory : " + params.debug_path);
+    }
+}
+
+QnnNspModel::~QnnNspModel() {
+    qualla::Timer start;
+
+    if (_threaded) {
+        __DEBUG("qnn-htp: stopping threadpool");
+        threadpool.stop(); // Stop Threadpool first
+    }
+
+    // Free cached RoPE memory
+    if (rope_sin != nullptr) free(rope_sin);
+    if (rope_cos != nullptr) free(rope_cos);
+
+    __DEBUG("qnn-htp: model destruct complete: {} usec", start.elapsed_usec());
+}
+bool QnnNspModel::float32ToFloat16(uint8_t *out, float *in, size_t numElements) {
+  if(!numElements) return false;
+  uint16_t *temp = (uint16_t *)out;
+  for(size_t i = 0; i < numElements; i++){
+    temp[i] = fp16_ieee_from_fp32_value(in[i]);
+  }
+  return true;
+}
+// Given a filename, initializeModel load and initializes QNN runtime libraries and the model
+bool QnnNspModel::initializeModel(void) {
+    qualla::Timer start;
+
+    __DEBUG("qnn-htp: model init start");
+
+    // Default backends
+#ifdef _WIN32
+    const std::string m_backend                = _backend_lib.empty() ? "QnnHtp.dll" : _backend_lib;
+    const std::string m_systemLib              = "QnnSystem.dll";
+    const std::string backendExtensionsLibPath = "QnnHtpNetRunExtensions.dll";
+#else
+    const std::string m_backend   = _backend_lib.empty() ? "libQnnHtp.so" : _backend_lib;
+    const std::string m_systemLib = "libQnnSystem.so";
+    const std::string backendExtensionsLibPath = "libQnnHtpNetRunExtensions.so";
+#endif
+#ifdef QUALLA_INTERNAL_QNN_SDK
+    if (_backend_ext_conf.empty()) {
+        __INFO("No backend extension config provided");
+    }
+    fs::path m_backendExtensionsConfigPath = fs::path(_backend_ext_conf);
+#else
+    fs::path m_backendExtensionsConfigPath =
+            _backend_ext_conf.empty() ? fs::path("data") / "htp_backend_ext_config.json"
+                                      : fs::path(_backend_ext_conf);
+
+    if (m_backendExtensionsConfigPath.is_relative())
+        m_backendExtensionsConfigPath = fs::path(model_basedir) / m_backendExtensionsConfigPath;
+
+    if (!fs::is_regular_file(m_backendExtensionsConfigPath)) {
+        __ERROR("Cannot access {}", m_backendExtensionsConfigPath.string());
+        return false;
+    }
+#endif
+    __INFO("Backend library : {}", m_backend);
+    __INFO("System library  : {}", m_systemLib);
+    __INFO("Model dir   : {}", model_basedir.string());
+    __INFO("Model files : {}", model_filelist);
+    __INFO("Backend extensions lib path : {}", backendExtensionsLibPath);
+    __INFO("Backend extensions config path : {}", m_backendExtensionsConfigPath.string());
+
+    if (!m_qnnApi->initialize(
+                m_backend,
+                model_filelist,
+                BackendExtensionsConfigs(
+                        backendExtensionsLibPath, m_backendExtensionsConfigPath.string()
+                ),
+                PerfProfile::BURST,
+                ContextConfigs(Qnn_Priority_t::QNN_PRIORITY_DEFAULT),
+                {},          // graphConfigs
+                true,        // loadFromCachedBinary
+                m_systemLib, // systemLibraryPath
+                false,
+                spill_fill_buffer_size,
+                m_use_mmap,
+                m_use_async_Init,
+                mmap_budget,
+                _debug_qnn,
+                graph_switching,
+                exec_select_graphs,
+                load_select_graphs
+        )) {
+        __ERROR("qnn-api initialization failed!");
+        return false;
+    }
+
+    int32_t n_splits = 0;
+    m_num_graphs     = m_qnnApi->getGraphsCount();
+
+    __INFO("qnn-api initialized with {} graph(s)", m_num_graphs);
+
+    GraphInfo_t** graphs_info = m_qnnApi->getGraphsInfo();
+    m_variant_list.reserve(m_num_graphs);
+    std::map<int32_t, std::vector<std::string>> graph_names;
+    for (size_t graph_idx = 0; graph_idx < m_num_graphs; graph_idx++) {
+        GraphInfo_t* const graph_info = graphs_info[graph_idx];
+        GraphVariant       graph(graph_info, m_qnnApi->getContexts(graph_info), m_ctx_size, m_layerNames);
+        __DEBUG("qnn-htp: Graph {}", graph.graph_name);
+
+        if (!variant_latency.empty() && !variant_latency.contains(graph.n_tokens)) {
+            __WARN("qnn-htp: Disabling {} based on conf file", graph.graph_name);
+            continue;
+        }
+
+        if (exec_select_graphs.size() != 0 &&
+            std::find(exec_select_graphs.begin(), exec_select_graphs.end(), graph.graph_name) ==
+                    exec_select_graphs.end()) {
+            __DEBUG("qnn-htp: Graph {} is not selected to execute based on conf file",
+                    graph.graph_name);
+            continue;
+        }
+        m_variant_list.emplace_back(graph);
+        n_splits = std::max(n_splits, ++nsp_graph_count[graph.n_tokens]);
+        graph_names[graph.n_tokens].push_back(graph.graph_name);
+        m_graph_map[std::string(graph_info->graphName)] = &m_variant_list.back();
+    }
+
+    if (exec_select_graphs.size() != 0 && graph_names.empty()) {
+        __ERROR("No matching graphs based on conf file");
+    }
+
+    // Create NSPGraph for each splits
+    m_nsp_graphs.reserve(n_splits);
+    for (int idx = 0; idx < n_splits; idx++) {
+        m_nsp_graphs.emplace_back(
+                idx, _env, m_ctx_size, m_qnnApi.get(), m_ioTensor.get(), _threaded
+        );
+        m_nsp_graphs.back().setDebugMode(_debug_specs, _debug_tensors, _debug_path);
+    }
+
+    // Insert all GraphVariants into corresponding NSPGraph
+    for (auto& [n_tokens, graphs] : graph_names) {
+        std::sort(graphs.begin(), graphs.end());
+        for (int idx = 0; idx < graphs.size(); idx++)
+            m_nsp_graphs.at(idx).addGraph(m_graph_map.at(graphs[idx]));
+    }
+
+    if (_debug_specs) dumpTensorSpecs();
+
+    {
+        __INFO("qnn-htp: Graphs loaded (AR-n: #splits): {}", nsp_graph_count);
+
+        // Check if latency map matches the graphs loaded
+        if (!variant_latency.empty()) {
+            for (auto [variant, latency] : variant_latency) {
+                if (!nsp_graph_count.contains(variant)) {
+                    __ERROR("Latency map (AR-n: #latency_ms): {}", variant_latency);
+                    __ERROR("AR-{} present in latency map but not loaded!", variant);
+                    __ERROR("Fix latency-map in the conf file, must map from AR-n to latency (ms)");
+                    return false;
+                }
+            }
+        }
+    }
+
+    __DEBUG("qnn-htp: Model Init complete: {} usec", start.elapsed_usec());
+
+    return true;
+}
+
+// Once the model has been loaded, initialize IO Tensors
+// m_ioTensors is initialized by the context for now
+bool QnnNspModel::initializeIOTensors() {
+
+    if(m_use_async_Init == false){ // IO Tensor Mem Registration is already done within the
+                                   // model_initailize by Qnn_API for Sync Init.
+
+        // set lmHeadWeightsEnabled and loraWeights Enabled
+        _lmhead_weight_input = m_qnnApi->getLmHeadWeightInputEnabled();
+        _lora_enabled = m_qnnApi->getLoraWeightEnabled();
+         for (auto it = nsp_graph_count.rbegin(); it != nsp_graph_count.rend(); ++it) {
+            for (QnnNspGraph& graph : m_nsp_graphs) {
+                // TensorAllocInfo is added to each NSP graph.
+                // Needed by Pointer_SHIFT Registration During Execute.
+                graph.tensor_alloc_info = m_qnnApi->getTensorAllocInfo();
+                if(graph.tensor_alloc_info == NULL){
+                    __ERROR("Error Tensor Allocation Failed.");
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    // This path is used in case of use Async Init is true.
+    qualla::Timer start;
+
+
+
+    __DEBUG("qnn-htp: init IO tensors start");
+
+    // Ideally, we should create and initalize m_ioTensor for each context, but we want to
+    // be able to see/use all the buffers in every contexts so that they can be connected
+    // with each other. Hence, we are using only the first context to initialize the m_ioTensor
+    // and use it for all graphs/contexts.
+    __DEBUG("qnn-htp: init IO tensor using {}", m_graph_map.begin()->first);
+    if (true != m_ioTensor->initialize(m_graph_map.begin()->second->context_handle)) {
+        __ERROR("qnn-htp: failure to initialize IOTensor");
+        return false;
+    }
+
+
+
+    // Technical note: unordered_map is faster thans map but map makes debug logs easier to read
+    // The runtime impact shouldn't be very large since max size < #tensors
+
+    typedef int CtxBitVector;
+    // Maps context bitVector to a map{tensor_name -> max_tensor_size}
+    std::map<CtxBitVector, std::map<std::string, size_t>> ctx_alloc_map;
+    // Maps tensor_name to context bitVector, each bit representing a context the tensor exists in
+    std::map<std::string, CtxBitVector> tensor_ctx_map;
+    // Maps a ContextHandle to a one-hot encoded bitVector (e.g. 1, 2, 4, ...)
+    std::map<Qnn_ContextHandle_t, CtxBitVector> ctx_to_hash;
+
+    // Iterate over all tensors in all GraphVariants to figure out allocations
+    for (auto& variant : m_variant_list) {
+        // Map the context handle to a hashed bitVector
+        if (!ctx_to_hash.contains(variant.context_handle)) {
+            ctx_to_hash[variant.context_handle] = 1 << ctx_to_hash.size();
+        }
+        for (auto& tensor_specs : {variant.input_specs, variant.output_specs}) {
+            for (auto& [tname, tspec] : tensor_specs) {
+                size_t       size     = tspec.dims.getAlignedSize();
+                CtxBitVector tcontext = ctx_to_hash[variant.context_handle];
+
+                // Check if it's LoRA enabled model
+                if (!_lora_enabled && tname.find("lora") != std::string::npos) _lora_enabled = true;
+                // Check if graph has lmhead weight input
+                if (!_lmhead_weight_input && tname.compare("weight") == 0)
+                    _lmhead_weight_input = true;
+
+                // Allocate KV Tensors as in+out
+                if (tname.starts_with("past_")) {
+                    if (tname.ends_with("_in")) continue; // kv_in is processed along with kv_out
+
+                    // For kv_out, add the size of kv_in as well
+                    const std::string tname_in = tname.substr(0, tname.rfind('_')).append("_in");
+                    if (auto tensor = variant.getInput(tname_in))
+                        size += tensor->dims.getAlignedSize();
+
+                    d_kv = QnnUtils::DataType(tspec.tensor);
+
+                    // Allocate extra buffer for pointer shift
+                    // 1024-n for keys (1024-n)*128 for values
+                    // For aligned size, we might as well use 1024 and 128*1024
+                    if (_kv_update_method == POINTER_SHIFT)
+                        size += (tname.starts_with("past_key")) ? m_ctx_size * d_kv.bw()
+                                                                : m_ctx_size * m_kv_dim * d_kv.bw();
+                }
+
+                if (tensor_ctx_map.contains(tname)) { // For duplicate tensor names, link them
+                    CtxBitVector context_bitvec = tensor_ctx_map.at(tname);
+                    size = std::max(ctx_alloc_map[context_bitvec][tname], size);
+                    if ((context_bitvec & tcontext) == 0) // Set of contexts needs to be updated
+                        ctx_alloc_map[context_bitvec].erase(tname);
+
+                    tcontext |= context_bitvec;
+                }
+
+                ctx_alloc_map[tcontext][tname] = size;
+                tensor_ctx_map[tname]          = tcontext;
+            }
+        }
+
+        // Cleanup is essential in case of very large number of splits
+        for (auto it = ctx_alloc_map.cbegin(); it != ctx_alloc_map.cend();)
+            it = (it->second.empty()) ? ctx_alloc_map.erase(it) : ++it;
+    }
+
+
+
+    _env.logger().compose(Logger::MALLOC_DEBUG, [&](Logger::Helper w) {
+        for (auto& [tcontext, tensor_alloc_map] : ctx_alloc_map) {
+            w.write(fmt::format("qnn-htp: ctx_alloc_map[{}] = {{", tcontext));
+            for (auto& [tname, tsize] : tensor_alloc_map)
+                w.write(fmt::format("\t{} : {},", tname, tsize));
+            w.write("}");
+        }
+    });
+
+    // Calculate total allocation sizes and offset of each tensor within its allocated buffer
+    if (m_ioTensor->allocateBuffers(ctx_alloc_map, tensor_alloc_info) == false) return false;
+
+    _env.logger().compose(Logger::MALLOC_DEBUG, [&](Logger::Helper w) {
+        w.write("tensor_alloc_info = {");
+        for (auto& [tname, toffset] : tensor_alloc_info)
+            w.write(fmt::format("\t{}: [{}, {}],", tname, toffset.first, toffset.second));
+        w.write("}");
+    });
+
+    // For each variant, map tensor name to its allocated buffer, i/o and offset within the buffer
+    // TODO: Check why we aren't just looping over all variants here!
+    for (auto it = nsp_graph_count.rbegin(); it != nsp_graph_count.rend(); ++it) {
+
+        for (QnnNspGraph& graph : m_nsp_graphs) {
+
+            // TODO: Remove this reference
+            graph.tensor_alloc_info = &tensor_alloc_info;
+
+            auto variant = graph[it->first];
+
+            std::map<std::string, std::tuple<int, size_t, size_t>> graph_allocs;
+
+
+            for (auto& [tname, tspec] : variant->input_specs) {
+                if (tname.starts_with("past_")) continue;
+                auto& [alloc_idx, offset] = tensor_alloc_info.at(tname);
+                graph_allocs[tname]       = {alloc_idx, offset, tspec.dims.getAlignedSize()};
+            }
+
+            for (auto& [tname, tspec] : variant->output_specs) {
+                size_t kv_offset = 0;
+                size_t size      = tspec.dims.getAlignedSize();
+
+                auto& [alloc_idx, offset] = tensor_alloc_info.at(tname);
+                if (tname.starts_with("past_")) {
+                    auto in_name = tname.substr(0, tname.rfind("_")).append("_in");
+                    if (auto kv_in = variant->getInput(in_name)) {
+                        kv_offset = kv_in->dims.getAlignedSize();
+                        if (_kv_update_method == POINTER_SHIFT)
+                            kv_offset += (tname.starts_with("past_key"))
+                                                 ? m_ctx_size * d_kv.bw()
+                                                 : m_ctx_size * m_kv_dim * d_kv.bw();
+                        graph_allocs[in_name] = {alloc_idx, offset, kv_offset};
+                    }
+                }
+
+                graph_allocs[tname] = {alloc_idx, offset + kv_offset, size};
+            }
+
+            if (!m_ioTensor->mapFusedBufferOffset(
+                        variant->graph_info, variant->context_handle, graph_allocs
+                )) {
+
+                __ERROR("Error mapping tensor to allocation buffers");
+                return false;
+            }
+        }
+    }
+
+
+
+    __DEBUG("qnn-htp: init IO tensors complete : {} usec", start.elapsed_usec());
+
+    return true;
+}
+
+static bool checkShape(
+        const std::string&                                              tensor_name,
+        const QnnUtils::Tensor*                                         tensor,
+        int32_t                                                         height,
+        int32_t                                                         width,
+        int32_t                                                         channel,
+        int32_t                                                         bitWidth,
+        std::vector<std::tuple<std::string, std::string, std::string>>& errors
+) {
+    if (tensor == nullptr) return true;
+    const QnnUtils::Dims& tDims = tensor->dims;
+
+    if ((height == -1 || height == tDims.height) && (width == -1 || width == tDims.width) &&
+        (channel == -1 || channel == tDims.channel) &&
+        (bitWidth == -1 || bitWidth == tDims.bitWidth))
+        return true;
+
+    std::stringstream err_msg;
+    err_msg << "Expected [ " << height << ", " << width << ", " << channel << "] "
+            << "bitWidth=" << bitWidth << ". Found [ " << tDims.height << ", " << tDims.width
+            << ", " << tDims.channel << "] "
+            << "bitWidth=" << tDims.bitWidth;
+
+    errors.push_back({"ShapeError", tensor_name, err_msg.str()});
+    return false;
+}
+
+// Run all validations for the model here so we can exit early
+bool QnnNspModel::validateModel() {
+    // Checks we will be running
+    // 1a. input_ids or inputs_embeds exists in the first split
+    // 1b. token_type_ids should exists in case of Bert
+    // 2. logits exists in the last split
+    // 3. Shapes for all named tensors are correct
+    // 4. All tensors with identical names (incl kv_in/kv_out) have identical quantization params
+    // Missing check : Shape of tensor between splits match up
+
+    // Support for 16-bit KV Tensors is temporarily disabled
+    // If you need this, please refer to past commits (QuaLLA <= v0.3.22)
+
+    // Important : These variables need to be set correctly
+    // m_vocab_size  - Calculated as max(logits.shape) since len()
+    // m_kv_dim      - Calculated in this function before usage
+    // m_ctx_size    - Provided by the user as n_ctx
+
+    std::vector<std::tuple<std::string, std::string, std::string>> errors;
+
+    QnnUtils::Tensor* tt;
+
+    //default input type is token
+    m_inputType = InputType::TOKENS;
+
+    // Check 1 - input layer exists
+    for (auto& [n_tokens, variant] : m_nsp_graphs.front().variants) {
+        // Update model expectations for E2T if an inputs_embeds layer is present. marks the input Type
+        if ((tt = variant->getInput("inputs_embeds")) != nullptr) {
+            m_layerNames[LayerType::INPUT] = "inputs_embeds";
+            m_inputType = InputType::EMBEDDINGS;
+        }
+        if ((tt = variant->getInput(m_layerNames[LayerType::INPUT])) == nullptr) {
+            errors.push_back({variant->graph_name, m_layerNames[LayerType::INPUT], "Tensor not found"});
+        } else {
+            input_bitWidth = tt->dtype.bw();
+            checkShape(m_layerNames[LayerType::INPUT], tt, -1, -1, -1, input_bitWidth, errors);
+
+            if (embedding_datatype == "float32") {
+                m_embeddingBufferSize = m_embd_size * sizeof(float);
+            } else {
+                m_embeddingBufferSize = m_embd_size * input_bitWidth;
+            }
+
+            // For embedding inputs, the expected count is multiplied by the embedding size.
+            size_t expectedElementCount = (m_inputType == InputType::TOKENS) ? n_tokens : n_tokens * m_embd_size;
+            if (tt->dims.getNumElements() != expectedElementCount)
+                errors.push_back({variant->graph_name, m_layerNames[LayerType::INPUT], "Wrong input shape"});
+        }
+    }
+
+    // Check 1b - In case of BERT :-> token_type_ids
+    if(m_modelArchitectureType == ModelArchitectureType::ENCODER) {
+        for (auto &[n_tokens, variant]: m_nsp_graphs.front().variants) {
+            if ((tt = variant->getInput(m_layerNames[LayerType::TOKEN_TYPE_IDS])) == nullptr)
+                errors.push_back({variant->graph_name, m_layerNames[LayerType::TOKEN_TYPE_IDS], "Tensor not found"});
+            else {
+                checkShape(m_layerNames[LayerType::TOKEN_TYPE_IDS], tt, -1, -1, -1, 4, errors);
+                if (tt->dims.getNumElements() != n_tokens)
+                    errors.push_back({variant->graph_name, m_layerNames[LayerType::TOKEN_TYPE_IDS],
+                                      "Wrong token_type_ids shape"});
+            }
+        }
+    }
+
+    // Check 2 - In case of LLama :-> logits exists
+    //           In case of BERT :-> pooled_output & sequence_outputs exists
+    for (auto& [n_tokens, variant] : m_nsp_graphs.back().variants) {
+        if (m_modelArchitectureType == ModelArchitectureType::ENCODER) {
+            if ((tt = variant->getOutput(m_layerNames[LayerType::POOL_OUTPUT])) == nullptr)
+                errors.push_back({variant->graph_name, m_layerNames[LayerType::POOL_OUTPUT], "Tensor not found"});
+            else {
+                if (tt->dims.getNumElements() != m_embd_size)
+                    errors.push_back(
+                            {variant->graph_name, m_layerNames[LayerType::POOL_OUTPUT], "Wrong pooled_outputs shape"});
+
+            }
+            if (!m_pooled_output) {
+                if ((tt = variant->getOutput(m_layerNames[LayerType::SEQ_OUTPUT])) == nullptr)
+                    errors.push_back({variant->graph_name, m_layerNames[LayerType::SEQ_OUTPUT], "Tensor not found"});
+                else {
+                    if (tt->dims.getNumElements() != n_tokens * m_embd_size)
+                        errors.push_back({variant->graph_name, m_layerNames[LayerType::SEQ_OUTPUT],
+                                          "Wrong sequence_output shape"});
+
+                }
+            }
+        } else {
+            if ((tt = variant->getOutput(m_layerNames[LayerType::OUTPUT])) == nullptr)
+                errors.push_back({variant->graph_name, m_layerNames[LayerType::OUTPUT], "Tensor not found"});
+            else {
+                if (m_vocab_size == -1) m_vocab_size = tt->dims.getMaxDim();
+                if (tt->dims.getNumElements() != m_vocab_size &&
+                    tt->dims.getNumElements() != n_tokens * m_vocab_size)
+                    errors.push_back({variant->graph_name, m_layerNames[LayerType::OUTPUT], "Wrong logits shape"});
+            }
+        }
+    }
+
+    // Check 3 - Shapes for all names tensors are correct
+    if (m_kv_dim == -1) { // Deduce KV$ embed_dim if not already available
+        for (auto& variant : m_variant_list) {
+            for (auto& [tname, tspec] : variant.output_specs)
+                if (tname.starts_with("past_key")) m_kv_dim = tspec.dims.width;
+            if (m_kv_dim != -1) break;
+        }
+    }
+
+    for (auto& variant : m_variant_list) {
+        auto& n_tokens = variant.n_tokens;
+        if(m_modelArchitectureType == ModelArchitectureType::ENCODER){
+            checkShape(m_layerNames[LayerType::ATTN_MASK], variant.getInput(m_layerNames[LayerType::ATTN_MASK]), 1, 1, m_ctx_size, -1, errors);
+        }
+        else{
+            checkShape(m_layerNames[LayerType::ATTN_MASK], variant.getInput(m_layerNames[LayerType::ATTN_MASK]), 1, n_tokens, m_ctx_size, -1, errors);
+        }
+        if (m_positional_encoding.type == PositionalEncoding::ROPE) {
+            checkShape(m_layerNames[LayerType::POS_SIN], variant.getInput(m_layerNames[LayerType::POS_SIN]), 1, n_tokens, m_pos_dim, -1, errors);
+            checkShape(m_layerNames[LayerType::POS_COS], variant.getInput(m_layerNames[LayerType::POS_COS]), 1, n_tokens, m_pos_dim, -1, errors);
+        } else if (m_positional_encoding.type == PositionalEncoding::ABSOLUTE) {
+            checkShape(m_layerNames[LayerType::POS_IDS], variant.getInput(m_layerNames[LayerType::POS_IDS]), 1, 1, n_tokens, -1, errors);
+        } else if (m_positional_encoding.type == PositionalEncoding::ALIBI) {
+            checkShape(m_layerNames[LayerType::POS_IDS], variant.getInput(m_layerNames[LayerType::POS_IDS]), 1, n_tokens, m_ctx_size, -1, errors);
+        }
+
+        if(m_modelArchitectureType != ModelArchitectureType::ENCODER) {
+            for (auto &[tname, tspec]: variant.input_specs) {
+                if (tname.starts_with("past_key"))
+                    checkShape(tname, &tspec, -1, m_kv_dim, m_ctx_size - n_tokens, 1, errors);
+                else if (tname.starts_with("past_value"))
+                    checkShape(tname, &tspec, -1, m_ctx_size - n_tokens, m_kv_dim, 1, errors);
+            }
+
+            for (auto &[tname, tspec]: variant.output_specs) {
+                if (tname.starts_with("past_key"))
+                    checkShape(tname, &tspec, -1, m_kv_dim, n_tokens, 1, errors);
+                else if (tname.starts_with("past_value"))
+                    checkShape(tname, &tspec, -1, n_tokens, m_kv_dim, 1, errors);
+            }
+        }
+    }
+
+    // skip check in case of BERT architecture since no KV cache tensors are existing
+    if(m_modelArchitectureType != ModelArchitectureType::ENCODER) {
+        // Check 4 - Quantization parameter match
+        std::unordered_map<std::string, QnnUtils::QuantParam> quant_params;
+        for (auto &variant: m_variant_list) {
+            for (auto &tensor_specs: {variant.input_specs, variant.output_specs}) {
+                for (auto &[tname, tspec]: tensor_specs) {
+                    std::string name = (tname.starts_with("past_") && tname.ends_with("_in"))
+                                       ? tname.substr(0, tname.rfind("_")).append("_out")
+                                       : tname;
+                    if (name.compare(m_layerNames[LayerType::OUTPUT]) == 0) continue;
+                    if (quant_params.contains(name)) {
+                        if (quant_params.at(name).scale != tspec.quantParam[0].scale ||
+                            quant_params.at(name).offset != tspec.quantParam[0].offset)
+                            errors.push_back(
+                                    {variant.graph_name,
+                                     tname,
+                                     "Non-identical quantization parameters found for the same tensor"}
+                            );
+                    } else
+                        quant_params[tname] = {tspec.quantParam[0].scale, tspec.quantParam[0].offset};
+                }
+            }
+        }
+    }
+
+    if (errors.size() > 0) {
+        QNN_ERROR("Model Validation Errors found");
+        for (auto& [graph_name, tensor_name, err_msg] : errors) // Log the list of errors
+            QNN_ERROR("%s : %s - %s", graph_name.c_str(), tensor_name.c_str(), err_msg.c_str());
+        QNN_ERROR("Note: -1 means ignore (i.e. no comparison)");
+        QNN_ERROR("Check model i/o specs (set dump-specs=true in config) for debugging");
+        return false;
+    }
+
+    return true;
+}
+
+bool QnnNspModel::initializeKVManager() {
+
+    if(m_disableKvCache){
+        return true;
+    }
+
+    // Pick the largest variant
+    int32_t variant = nsp_graph_count.rbegin()->first;
+
+    int idx = 0;
+    for (auto& graph : m_nsp_graphs) {
+        auto& specs = (variant == m_ctx_size) ? graph[variant]->output_specs
+                                              : graph[variant]->input_specs;
+
+        ThreadPool* _pool = _threaded ? &threadpool : nullptr;
+        // clang-format off
+        NewNSPKVManager *manager = new NewNSPKVManager( idx++, _env, _pool, m_ioTensor.get(),
+                specs, m_ctx_size, m_kv_dim, _kv_update_method);
+        // clang-format on
+        graph.registerKVManager(manager);
+
+        if (_kv_update_method == POINTER_SHIFT)
+            graph.kvmanager->setTensorAllocInfo(&tensor_alloc_info);
+    }
+
+    _kv_dispatcher =
+            std::unique_ptr<KVDispatcher>(new KVDispatcher(_env, m_nsp_graphs, _threaded, _cpumask)
+            );
+    _kv_update_count = _kv_dispatcher->dispatch(variant, 0);
+
+    return true;
+}
+
+inline bool QnnNspModel::updateTensorPointer(
+        GraphVariant&      variant,
+        std::string&       key,
+        QnnUtils::Tensor*& t
+) {
+    QnnUtils::Tensor* tensor_ptr = variant.getInput(key);
+    if (tensor_ptr == nullptr) return true;
+    if (t == nullptr) t = tensor_ptr;
+    if (getBuffer(t) == getBuffer(tensor_ptr)) return true;
+
+    __ERROR("{} has different addresses: {} vs {}", key, (void*)t, (void*)tensor_ptr);
+    return false;
+}
+
+bool QnnNspModel::initializeTensorPointers() {
+    // Ideally this needs to be done for all sets of AR-n available, e.g. for AR-1 and AR-1024
+
+    bool status = true;
+    for (auto& variant : m_variant_list) {
+        status &= updateTensorPointer(variant, m_layerNames[LayerType::INPUT], t_input_ids);
+        status &= updateTensorPointer(variant, m_layerNames[LayerType::ATTN_MASK], t_attn_mask);
+        status &= updateTensorPointer(variant, m_layerNames[LayerType::POS_SIN], t_position_ids_sin);
+        status &= updateTensorPointer(variant, m_layerNames[LayerType::POS_COS], t_position_ids_cos);
+        status &= updateTensorPointer(variant, m_layerNames[LayerType::POS_IDS], t_position_ids);
+        status &= updateTensorPointer(variant, m_layerNames[LayerType::TOKEN_TYPE_IDS], t_token_type_ids);
+    }
+    if (!status) __ERROR("qnn-htp: Error in setting up named tensor pointers.");
+
+    status &= !(!t_input_ids || !t_attn_mask);
+    if (!t_input_ids) __ERROR("Tensor not found: {}", m_layerNames[LayerType::INPUT]);
+    if (!t_attn_mask) __ERROR("Tensor not found: {}", m_layerNames[LayerType::ATTN_MASK]);
+
+    if(m_modelArchitectureType == ModelArchitectureType::ENCODER){ // This input only valid for Encoder only model like bert.
+        status &= !(!t_token_type_ids);
+        if (!t_token_type_ids) __ERROR("Tensor not found: {}", m_layerNames[LayerType::TOKEN_TYPE_IDS]);
+    }
+
+    if (m_positional_encoding.type == PositionalEncoding::ROPE) {
+        status &= !(!t_position_ids_sin || !t_position_ids_cos);
+        if (!t_position_ids_sin) __ERROR("Tensor not found: {}", m_layerNames[LayerType::POS_SIN]);
+        if (!t_position_ids_cos) __ERROR("Tensor not found: {}", m_layerNames[LayerType::POS_COS]);
+    } else if (m_positional_encoding.type == PositionalEncoding::ABSOLUTE) {
+        status &= !(!t_position_ids);
+        if (!t_position_ids) __ERROR("Tensor not found: {}", m_layerNames[LayerType::POS_IDS]);
+    } else if (m_positional_encoding.type == PositionalEncoding::ALIBI) {
+        status &= !(!t_position_ids);
+        if (!t_position_ids) __ERROR("Tensor not found: {}", m_layerNames[LayerType::POS_IDS]);
+    } else {
+        __ERROR("Unknown Rope Type found for tensor: {}", m_layerNames[LayerType::POS_IDS]);
+    }
+
+    // Detect activation bitwidth
+    if (status) {
+        //Check Input-> Input_ID or Input_Embed
+        d_input      = t_input_ids->dtype;
+        if (!supported_activations.contains(d_input)) {
+            __ERROR("Input Tensor: {} as unsupported activation type {}", m_layerNames[LayerType::INPUT], d_input.str());
+            status = false;
+        }
+        // Check Attention Mask
+        d_attn_map   = t_attn_mask->dtype;
+        if (!supported_activations.contains(d_attn_map)) {
+            __ERROR("attention_mask has unsupported type {}", d_attn_map.str());
+            status = false;
+        }
+        // For Encoder only model, Check for Token_type_ids
+        if(m_modelArchitectureType == ModelArchitectureType::ENCODER) {
+            d_token_type = t_token_type_ids->dtype;
+            if (!supported_activations.contains(d_token_type)) {
+                __ERROR("token_type_ids has unsupported type {}", d_token_type.str());
+                status = false;
+            }
+        }
+
+        //For Position_IDs check data bitWidth
+        if (m_positional_encoding.type == PositionalEncoding::ROPE)
+            d_pos = t_position_ids_sin->dtype;
+        else if (m_positional_encoding.type == PositionalEncoding::ABSOLUTE)
+            d_pos = t_position_ids->dtype;
+        else if (m_positional_encoding.type == PositionalEncoding::ALIBI)
+            d_pos = t_position_ids->dtype;
+
+        if (((m_positional_encoding.type == PositionalEncoding::ABSOLUTE ||
+                m_positional_encoding.type == PositionalEncoding::ALIBI) &&
+                d_pos != QNN_DATATYPE_INT_32) ||
+                (m_positional_encoding.type == PositionalEncoding::ROPE &&
+                !supported_activations.contains(d_pos))) {
+                __ERROR("position encoding tensor has unsupported type {}", d_pos.str());
+                status = false;
+        }
+        __DEBUG("qnn-htp datatypes: d_input {} d_attn_map {} d_pos {} d_kv {}",
+                d_input.str(),
+                d_attn_map.str(),
+                d_pos.str(),
+                d_kv.str());
+
+        if (!status) __ERROR("Only 8-bit, 16-bit and 32-bit activations are supported");
+    }
+
+    return status;
+}
+bool QnnNspModel::setupAttentionMaskFP16(bool                     pad_left,
+                                         int                      n_tokens,
+                                         int                      n_inputs,
+                                         int                      n_past,
+                                         std::span<const int32_t> attention_map,
+                                         size_t                   n_skip_prefix,
+                                         size_t                   n_apply_prefix_offset) {
+  QnnUtils::Dims t_attn_mask_dims = t_attn_mask->dims;
+  size_t numElements = t_attn_mask_dims.getNumElements();
+  size_t bufSize = numElements * 2; // (bitwidth = 16, in bytes: 16/8)
+  std::vector<unsigned char> attn_mask_vec(bufSize);
+  if (!float32ToFloat16((unsigned char *)attn_mask_vec.data(), (float *) getBuffer(t_attn_mask), numElements)) {
+    QNN_ERROR("Number of elements is 0");
+    return false;
+  }
+  // Setup attention mask
+  {
+    uint16_t*    attn_buffer = (uint16_t*)attn_mask_vec.data();
+    const int n_valid     = n_past + n_inputs;
+
+    uint16_t pos_val = -1, neg_val = 0;
+    pos_val = 0;
+    neg_val = -1000;
+
+    // Clear the attention mask
+    std::fill_n(attn_buffer, n_tokens * m_ctx_size, neg_val);
+    if (attention_map.empty()) {
+      uint16_t* cur_ptr = &attn_buffer
+      [(pad_left) ? (m_ctx_size - n_valid) * (m_ctx_size + 1)
+                  : m_ctx_size - n_past - n_tokens];
+      for (int n_masked = n_past + 1; n_masked <= n_valid; n_masked++) {
+        std::fill_n(cur_ptr, n_masked, pos_val);
+        cur_ptr += m_ctx_size;
+      }
+    } else if (attention_map.size() == n_inputs) {
+      // Only fill in n_inputs. Rest will be padding
+      const size_t attn_row_start = m_ctx_size - n_past - n_tokens;
+      for (int i = 0; i < n_inputs; i++) {
+        uint16_t* cur_ptr = &attn_buffer[i * m_ctx_size + attn_row_start];
+
+        cur_ptr[n_past + i] = pos_val; // Attend to itself
+        if (attention_map[i] < 0) {    // If negative, attend to only past tokens
+          int32_t n_masked = n_past + attention_map[i] + 1;
+          if (i < n_apply_prefix_offset) { // Skip prefix is needed
+            cur_ptr += n_skip_prefix;
+            n_masked -= n_skip_prefix;
+          }
+          std::fill_n(cur_ptr, n_masked, pos_val);
+
+        } else { // If positive, copy attention map from (relative to 0th input) parent
+          const int32_t pidx       = attention_map[i]; // Parent token index
+          uint16_t*        parent_ptr = &attn_buffer[pidx * m_ctx_size + attn_row_start];
+          std::memcpy(cur_ptr, parent_ptr, (n_past + pidx + 1) * sizeof(uint16_t));
+
+          // If parent skipped prefix, but this token needs to attend to prefix, add attn
+          if (i >= n_apply_prefix_offset && pidx < n_apply_prefix_offset)
+            std::fill_n(cur_ptr, n_skip_prefix, pos_val);
+        }
+      }
+    } else if (attention_map.size() == n_valid * n_inputs) {
+      uint16_t* cur_ptr = &attn_buffer[m_ctx_size - n_past - n_tokens];
+      for (int i = 0; i < n_inputs; i++) {
+        for (int j = 0; j < n_valid; j++)
+          cur_ptr[j] = (attention_map[i * n_valid + j] == 0) ? neg_val : pos_val;
+        cur_ptr += m_ctx_size;
+      }
+    }
+  }
+
+  return true;
+
+}
+template <typename DType>
+bool QnnNspModel::setupAttentionMask(
+        bool                     pad_left,
+        int                      n_tokens,
+        int                      n_inputs,
+        int                      n_past,
+        std::span<const int32_t> attention_map,
+        size_t                   n_skip_prefix,
+        size_t                   n_apply_prefix_offset
+) {
+    // Setup attention mask
+    {
+        DType*    attn_buffer = (DType*)getBuffer(t_attn_mask);
+        const int n_valid     = n_past + n_inputs;
+
+        DType pos_val = -1, neg_val = 0;
+
+        if(m_modelArchitectureType == ModelArchitectureType::ENCODER){
+            pos_val = 1; // BGE model is using 1 to set attention mask and 0 to unset.
+            std::memset(attn_buffer, neg_val, 1 * m_ctx_size * sizeof(DType));
+            size_t in_buf_offset = pad_left ? m_ctx_size - n_valid : 0;
+            DType* cur_ptr = &attn_buffer[in_buf_offset];
+            std::fill_n(cur_ptr, n_valid, pos_val);
+        }
+        else {
+            // Clear the attention mask
+            std::fill_n(attn_buffer, n_tokens * m_ctx_size, neg_val);
+            if (attention_map.empty()) {
+                DType *cur_ptr = &attn_buffer
+                [(pad_left) ? (m_ctx_size - n_valid) * (m_ctx_size + 1)
+                            : m_ctx_size - n_past - n_tokens];
+                for (int n_masked = n_past + 1; n_masked <= n_valid; n_masked++) {
+                    std::fill_n(cur_ptr, n_masked, pos_val);
+                    cur_ptr += m_ctx_size;
+                }
+            } else if (attention_map.size() == n_inputs) {
+                // Only fill in n_inputs. Rest will be padding
+                const size_t attn_row_start = m_ctx_size - n_past - n_tokens;
+                for (int i = 0; i < n_inputs; i++) {
+                    DType *cur_ptr = &attn_buffer[i * m_ctx_size + attn_row_start];
+
+                    cur_ptr[n_past + i] = pos_val; // Attend to itself
+                    if (attention_map[i] < 0) {    // If negative, attend to only past tokens
+                        int32_t n_masked = n_past + attention_map[i] + 1;
+                        if (i < n_apply_prefix_offset) { // Skip prefix is needed
+                            cur_ptr += n_skip_prefix;
+                            n_masked -= n_skip_prefix;
+                        }
+                        std::fill_n(cur_ptr, n_masked, pos_val);
+
+                    } else { // If positive, copy attention map from (relative to 0th input) parent
+                        const int32_t pidx = attention_map[i]; // Parent token index
+                        DType *parent_ptr = &attn_buffer[pidx * m_ctx_size + attn_row_start];
+                        std::memcpy(cur_ptr, parent_ptr, (n_past + pidx + 1) * sizeof(DType));
+
+                        // If parent skipped prefix, but this token needs to attend to prefix, add attn
+                        if (i >= n_apply_prefix_offset && pidx < n_apply_prefix_offset)
+                            std::fill_n(cur_ptr, n_skip_prefix, pos_val);
+                    }
+                }
+            } else if (attention_map.size() == n_valid * n_inputs) {
+                DType *cur_ptr = &attn_buffer[m_ctx_size - n_past - n_tokens];
+                for (int i = 0; i < n_inputs; i++) {
+                    for (int j = 0; j < n_valid; j++)
+                        cur_ptr[j] = (attention_map[i * n_valid + j] == 0) ? neg_val : pos_val;
+                    cur_ptr += m_ctx_size;
+                }
+            }
+        }
+    }
+
+    return true;
+}
+    bool QnnNspModel::setupRopePositionEmbeddingFP16(
+            bool                     pad_left,
+            int                      n_tokens,
+            int                      n_inputs,
+            int                      n_past,
+            std::span<const int32_t> attention_map,
+            size_t                   n_skip_prefix,
+            size_t                   n_apply_prefix_offset
+    ) {
+      const int n_valid = n_past + n_inputs;
+
+      // Cast RoPE embeddings to proper dtype
+      // The following two buffers are already converted to fp16
+      uint16_t* typed_rope_sin = (uint16_t*)rope_sin;
+      uint16_t* typed_rope_cos = (uint16_t*)rope_cos;
+
+      // These two need conversion
+
+      QnnUtils::Dims t_position_ids_cos_dims = t_position_ids_cos->dims;
+      size_t numElements = t_position_ids_cos_dims.getNumElements();
+      size_t bufSize = numElements * 2; // (bitwidth = 16, in bytes: 16/8)
+      std::vector<unsigned char> position_ids_cos_vec(bufSize);
+      if (!float32ToFloat16((unsigned char *)position_ids_cos_vec.data(), (float *) getBuffer(t_position_ids_cos), numElements)) {
+        QNN_ERROR("Number of elements is 0");
+        return false;
+      }
+      uint16_t* cos_buffer = (uint16_t*)position_ids_cos_vec.data();
+
+      QnnUtils::Dims t_position_ids_sin_dims = t_position_ids_sin->dims;
+      numElements = t_position_ids_sin_dims.getNumElements();
+      bufSize = numElements * 2; // (bitwidth = 16, in bytes: 16/8)
+      std::vector<unsigned char> position_ids_sin_vec(bufSize);
+      if (!float32ToFloat16((unsigned char *)position_ids_sin_vec.data(), (float *) getBuffer(t_position_ids_sin), numElements)) {
+        QNN_ERROR("Number of elements is 0");
+        return false;
+      }
+      uint16_t* sin_buffer = (uint16_t*)position_ids_sin_vec.data();
+
+      // Clear out all position_ids as position_sin/cos[0]
+      const size_t pos_row_size = m_pos_dim * sizeof(uint16_t);
+      for (int i = 0; i < n_tokens; i++) {
+        std::memcpy(&sin_buffer[i * m_pos_dim], typed_rope_sin, pos_row_size);
+        std::memcpy(&cos_buffer[i * m_pos_dim], typed_rope_cos, pos_row_size);
+      }
+
+      // Copy in position embeddings [0:(n_valid-1)] to input sin/cos buffer
+      const size_t pos_buf_offset = m_pos_dim * ((pad_left) ? m_ctx_size - n_valid : 0);
+      if (attention_map.size() == n_inputs) {
+        // Copy embeddings one by one based on the attention map
+        std::vector<int32_t> pos_ids(n_inputs, 0);
+        auto                 sin = &sin_buffer[pos_buf_offset];
+        auto                 cos = &cos_buffer[pos_buf_offset];
+
+        // 1st token
+        pos_ids[0] = m_nPast - n_skip_prefix;
+        std::memcpy(sin, &typed_rope_sin[pos_ids[0] * m_pos_dim], pos_row_size);
+        std::memcpy(cos, &typed_rope_cos[pos_ids[0] * m_pos_dim], pos_row_size);
+        sin += m_pos_dim;
+        cos += m_pos_dim;
+
+        // Rest
+        for (int i = 1; i < n_inputs; i++) {
+          auto parent_index = attention_map[i];
+          pos_ids[i]        = pos_ids[parent_index] + 1;
+          std::memcpy(sin, &typed_rope_sin[pos_ids[i] * m_pos_dim], pos_row_size);
+          std::memcpy(cos, &typed_rope_cos[pos_ids[i] * m_pos_dim], pos_row_size);
+          sin += m_pos_dim;
+          cos += m_pos_dim;
+        }
+      } else if (attention_map.size() == (n_past + n_inputs) * n_inputs) {
+        // For now, simply have the same position ID across the variant
+        auto sin = &sin_buffer[0];
+        auto cos = &cos_buffer[0];
+
+        // Calculate position based on number of items this index is attending to
+        for (int i = 0; i < n_inputs; i++) {
+          auto    attn_row = attention_map.subspan(i * n_valid, n_valid);
+          int32_t pos_id =
+                  std::accumulate(attn_row.begin() + n_skip_prefix, attn_row.end(), 0) - attn_row[n_past + i];
+
+          // __DEBUG("PositionID [ i={}, n_past={}, pos_id={} ]", i, n_past, pos_id);
+
+          std::memcpy(sin, &typed_rope_sin[pos_id * m_pos_dim], pos_row_size);
+          std::memcpy(cos, &typed_rope_cos[pos_id * m_pos_dim], pos_row_size);
+          sin += m_pos_dim;
+          cos += m_pos_dim;
+        }
+      } else {
+        const size_t pos_dat_offset = m_pos_dim * (n_past - n_skip_prefix);
+        const size_t pos_cpy_amt    = pos_row_size * ((pad_left) ? n_valid : n_tokens);
+        std::memcpy(&sin_buffer[pos_buf_offset], &typed_rope_sin[pos_dat_offset], pos_cpy_amt);
+        std::memcpy(&cos_buffer[pos_buf_offset], &typed_rope_cos[pos_dat_offset], pos_cpy_amt);
+      }
+
+      return true;
+    }
+template <typename DType>
+bool QnnNspModel::setupRopePositionEmbedding(
+        bool                     pad_left,
+        int                      n_tokens,
+        int                      n_inputs,
+        int                      n_past,
+        std::span<const int32_t> attention_map,
+        size_t                   n_skip_prefix,
+        size_t                   n_apply_prefix_offset
+) {
+
+    const int n_valid = n_past + n_inputs;
+
+    // Cast RoPE embeddings to proper dtype
+    DType* typed_rope_sin = (DType*)rope_sin;
+    DType* typed_rope_cos = (DType*)rope_cos;
+
+    DType* cos_buffer = (DType*)getBuffer(t_position_ids_cos);
+    DType* sin_buffer = (DType*)getBuffer(t_position_ids_sin);
+
+    // Clear out all position_ids as position_sin/cos[0]
+    const size_t pos_row_size = m_pos_dim * sizeof(DType);
+    for (int i = 0; i < n_tokens; i++) {
+        std::memcpy(&sin_buffer[i * m_pos_dim], typed_rope_sin, pos_row_size);
+        std::memcpy(&cos_buffer[i * m_pos_dim], typed_rope_cos, pos_row_size);
+    }
+
+    // Copy in position embeddings [0:(n_valid-1)] to input sin/cos buffer
+    const size_t pos_buf_offset = m_pos_dim * ((pad_left) ? m_ctx_size - n_valid : 0);
+    if (attention_map.size() == n_inputs) {
+        // Copy embeddings one by one based on the attention map
+        std::vector<int32_t> pos_ids(n_inputs, 0);
+        auto                 sin = &sin_buffer[pos_buf_offset];
+        auto                 cos = &cos_buffer[pos_buf_offset];
+
+        // 1st token
+        pos_ids[0] = m_nPast - n_skip_prefix;
+        std::memcpy(sin, &typed_rope_sin[pos_ids[0] * m_pos_dim], pos_row_size);
+        std::memcpy(cos, &typed_rope_cos[pos_ids[0] * m_pos_dim], pos_row_size);
+        sin += m_pos_dim;
+        cos += m_pos_dim;
+
+        // Rest
+        for (int i = 1; i < n_inputs; i++) {
+            auto parent_index = attention_map[i];
+            pos_ids[i]        = pos_ids[parent_index] + 1;
+            std::memcpy(sin, &typed_rope_sin[pos_ids[i] * m_pos_dim], pos_row_size);
+            std::memcpy(cos, &typed_rope_cos[pos_ids[i] * m_pos_dim], pos_row_size);
+            sin += m_pos_dim;
+            cos += m_pos_dim;
+        }
+    } else if (attention_map.size() == (n_past + n_inputs) * n_inputs) {
+        // For now, simply have the same position ID across the variant
+        auto sin = &sin_buffer[0];
+        auto cos = &cos_buffer[0];
+
+        // Calculate position based on number of items this index is attending to
+        for (int i = 0; i < n_inputs; i++) {
+            auto    attn_row = attention_map.subspan(i * n_valid, n_valid);
+            int32_t pos_id =
+                    std::accumulate(attn_row.begin() + n_skip_prefix, attn_row.end(), 0) - attn_row[n_past + i];
+
+            // __DEBUG("PositionID [ i={}, n_past={}, pos_id={} ]", i, n_past, pos_id);
+
+            std::memcpy(sin, &typed_rope_sin[pos_id * m_pos_dim], pos_row_size);
+            std::memcpy(cos, &typed_rope_cos[pos_id * m_pos_dim], pos_row_size);
+            sin += m_pos_dim;
+            cos += m_pos_dim;
+        }
+    } else {
+        const size_t pos_dat_offset = m_pos_dim * (n_past - n_skip_prefix);
+        const size_t pos_cpy_amt    = pos_row_size * ((pad_left) ? n_valid : n_tokens);
+        std::memcpy(&sin_buffer[pos_buf_offset], &typed_rope_sin[pos_dat_offset], pos_cpy_amt);
+        std::memcpy(&cos_buffer[pos_buf_offset], &typed_rope_cos[pos_dat_offset], pos_cpy_amt);
+    }
+
+    return true;
+}
+
+template <typename DType>
+bool QnnNspModel::setupAlibiPositionEmbedding(
+        bool pad_left,
+        int  n_tokens,
+        int  n_inputs,
+        int  n_past
+) {
+    DType* alibi_buffer = (DType*)getBuffer(t_position_ids);
+
+    const int   n_valid = n_past + n_inputs;
+    const DType pad_val = m_ctx_size;
+
+    // Clear alibi buffer
+    std::fill_n(alibi_buffer, n_tokens * m_ctx_size, pad_val);
+
+    // Detect start of past tokens and new tokens based on m_ctx_size and n_tokens (variant)
+    DType* alibi_past = alibi_buffer;                         // [0, m_ctx_size-n_tokens)
+    DType* alibi_new  = alibi_buffer + m_ctx_size - n_tokens; // [m_ctx_size-n_tokens, m_ctx_size)
+
+    // For non SMART_MASK, past tokens/KV$ is left-padded and past ptr needs to be offset by padding
+    alibi_past += m_ctx_size - n_tokens - n_past;
+
+    // For left padded inputs, new pointer needs to be offset by n_tokens - n_inputs
+    if (pad_left) {
+        alibi_new += n_tokens - n_inputs;
+        alibi_past += (n_tokens - n_inputs) * m_ctx_size;
+        alibi_new += (n_tokens - n_inputs) * m_ctx_size;
+    }
+
+    // Fill alibi positions from [-n_past-i, -i) and [-i, 0]
+    for (int i = 0; i < n_inputs; i++) {
+        std::iota(
+                std::reverse_iterator<DType*>(alibi_past + n_past),
+                std::reverse_iterator<DType*>(alibi_past),
+                i + 1
+        ); // Fill past tokens
+        std::iota(
+                std::reverse_iterator<DType*>(alibi_new + i + 1),
+                std::reverse_iterator<DType*>(alibi_new),
+                0
+        ); // Fill new tokens
+
+        alibi_past += m_ctx_size; // Update pointers to next row
+        alibi_new += m_ctx_size;
+    }
+
+    return true;
+}
+
+bool QnnNspModel::setupInputTensors(
+        std::span<int32_t>       tokens,
+        int32_t                  n_past,
+        std::span<const int32_t> attention_map,
+        size_t                   n_skip_prefix,
+        size_t                   n_apply_prefix_offset
+) {
+    qualla::Timer start;
+
+    const int     n_tokens = run_info.n_tokens;
+    const int     n_inputs = run_info.n_processed;
+    const int32_t n_valid  = n_past + n_inputs;
+    __TRACE("qnn-htp: setup-input-tensors with {} tokens for AR-{}", n_inputs, n_tokens);
+
+    const bool pad_left = (n_tokens == m_ctx_size);
+    if (n_inputs > n_tokens) {
+        __ERROR("qnn-htp: setup-input-tensors too many tokens: {} on AR-{}", n_inputs, n_tokens);
+        return false;
+    }
+
+    // Setup input id tensor
+    {
+        uint32_t* input_id_buffer = (uint32_t*)getBuffer(t_input_ids);
+        std::fill_n(input_id_buffer, n_tokens, static_cast<uint32_t>(m_pad_token));
+
+        size_t in_buf_offset = pad_left ? n_tokens - n_inputs : 0;
+        std::memcpy(&input_id_buffer[in_buf_offset], tokens.data(), n_inputs * sizeof(uint32_t));
+    }
+
+    // clang-format off
+    switch (d_attn_map) {
+    case QNN_DATATYPE_UFIXED_POINT_8:
+        setupAttentionMask<uint8_t>(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix, n_apply_prefix_offset); break;
+    case QNN_DATATYPE_UFIXED_POINT_16:
+        setupAttentionMask<uint16_t>(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix, n_apply_prefix_offset); break;
+    case QNN_DATATYPE_INT_32:
+        setupAttentionMask<int32_t>(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix, n_apply_prefix_offset); break;
+    case QNN_DATATYPE_FLOAT_16: {
+        setupAttentionMaskFP16(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix,
+                                     n_apply_prefix_offset);
+        break;
+    }
+    default: __ERROR("Unsupported attention mask dtype {}", d_attn_map.str()); return false;
+    }
+    // clang-format on
+
+    // Setup token type IDs
+    if(m_modelArchitectureType == ModelArchitectureType::ENCODER) {
+        //BERT Specific
+        uint32_t *token_type_id_buffer = (uint32_t *) getBuffer(t_token_type_ids);
+        std::memset(token_type_id_buffer, 0, n_tokens * sizeof(uint32_t));
+    }
+
+    // Setup position IDs
+    if (m_positional_encoding.type == PositionalEncoding::ROPE) {
+        // clang-format off
+        switch (d_pos) {
+        case QNN_DATATYPE_UFIXED_POINT_8:
+            setupRopePositionEmbedding<uint8_t>(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix, n_apply_prefix_offset); break;
+        case QNN_DATATYPE_UFIXED_POINT_16:
+            setupRopePositionEmbedding<uint16_t>(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix, n_apply_prefix_offset); break;
+        case QNN_DATATYPE_FLOAT_16:
+            setupRopePositionEmbeddingFP16(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix, n_apply_prefix_offset); break;
+        default: __ERROR("Unsupported rope position dtype {}", d_pos.str()); return false;
+        }
+        // clang-format on
+    } else if (m_positional_encoding.type == PositionalEncoding::ABSOLUTE) {
+        uint32_t* position_id_buffer = (uint32_t*)getBuffer(t_position_ids);
+        std::memset(position_id_buffer, 0, n_tokens * sizeof(uint32_t));
+
+        // Fill up position_ids buffer
+        uint32_t* pos_id_start = &position_id_buffer[pad_left ? n_tokens - n_inputs : 0];
+        uint32_t* pos_id_end   = pos_id_start + n_inputs;
+        std::iota(pos_id_start, pos_id_end, n_past);
+    } else if (m_positional_encoding.type == PositionalEncoding::ALIBI) {
+        setupAlibiPositionEmbedding<int32_t>(pad_left, n_tokens, n_inputs, n_past);
+    }
+
+    __TRACE("qnn-htp: setup-input-tensors complete : {} usec", start.elapsed_usec());
+    return true;
+}
+
+
+bool QnnNspModel::setupInputTensors(
+        std::span<uint8_t>       embedding,
+        int32_t                  n_past,
+        std::span<const int32_t> attention_map,
+        size_t                   n_skip_prefix,
+        size_t                   n_apply_prefix_offset
+) {
+    qualla::Timer start;
+
+    const int     n_tokens = run_info.n_tokens;
+    const int     n_inputs = run_info.n_processed;
+    const int32_t n_valid  = n_past + n_inputs;
+    __TRACE("qnn-htp: setup-input-tensors with {} tokens for AR-{}", n_inputs, n_tokens);
+
+    const bool pad_left = (n_tokens == m_ctx_size);
+    if (n_inputs > n_tokens) {
+        __ERROR("qnn-htp: setup-input-tensors too many tokens: {} on AR-{}", n_inputs, n_tokens);
+        return false;
+    }
+
+    // Setup input embeds tensor
+    {
+        // Quantize and fill, don't make double copy
+        size_t in_buf_offset = pad_left ? n_tokens - n_inputs : 0;
+        size_t startIdx = pad_left ? 0 : n_inputs;
+        size_t endIdx = pad_left ? in_buf_offset : n_tokens;
+
+        if (embedding_datatype == "float32") {
+            // First flush the buffer with eos token embedding
+            for (size_t i = startIdx; i < endIdx; i++) {
+                quantizeInput((float*)m_eosEmbedding.data(), i*m_embd_size, m_embd_size);
+            }
+
+            // Quantize the data input vector
+            quantizeInput((float*)embedding.data(), in_buf_offset*m_embd_size, n_inputs * m_embd_size);
+        } else if (embedding_datatype == "native") {
+            // Size of the buffer for one embedding vector.
+            const size_t embedBufSize = m_embeddingBufferSize;
+            // First flush the buffer with eos token embedding
+            uint8_t* embeddingSrc = static_cast<uint8_t*>(m_eosEmbedding.data());
+            for (size_t i = startIdx; i < endIdx; i++) {
+                std::copy(embeddingSrc, embeddingSrc + embedBufSize, (uint8_t*)getBuffer(t_input_ids) + i*embedBufSize);
+            }
+
+            // Copy the data input vector
+            embeddingSrc = static_cast<uint8_t*>(embedding.data());
+            std::copy(embeddingSrc, embeddingSrc + embedding.size(), (uint8_t*)getBuffer(t_input_ids) + in_buf_offset*embedBufSize);
+        }
+    }
+
+    // Don't modify attention mask it should work out of the box
+    // clang-format off
+    switch (d_attn_map) {
+        case QNN_DATATYPE_UFIXED_POINT_8:
+            setupAttentionMask<uint8_t>(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix, n_apply_prefix_offset); break;
+        case QNN_DATATYPE_UFIXED_POINT_16:
+            setupAttentionMask<uint16_t>(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix, n_apply_prefix_offset); break;
+        case QNN_DATATYPE_INT_32:
+            setupAttentionMask<int32_t>(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix, n_apply_prefix_offset); break;
+        case QNN_DATATYPE_FLOAT_16: {
+            setupAttentionMaskFP16(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix,
+                                   n_apply_prefix_offset);
+            break;
+        }
+        default: __ERROR("Unsupported attention mask dtype {}", d_attn_map.str()); return false;
+    }
+    // clang-format on
+
+    // Setup token type IDs // Will not be
+    if(m_modelArchitectureType == ModelArchitectureType::ENCODER) {
+        //BERT Specific
+        uint32_t *token_type_id_buffer = (uint32_t *) getBuffer(t_token_type_ids);
+        std::memset(token_type_id_buffer, 0, n_tokens * sizeof(uint32_t));
+    }
+
+    // Setup position IDs
+    if (m_positional_encoding.type == PositionalEncoding::ROPE) {
+        // clang-format off
+        switch (d_pos) {
+            case QNN_DATATYPE_UFIXED_POINT_8:
+                setupRopePositionEmbedding<uint8_t>(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix, n_apply_prefix_offset); break;
+            case QNN_DATATYPE_UFIXED_POINT_16:
+                setupRopePositionEmbedding<uint16_t>(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix, n_apply_prefix_offset); break;
+            case QNN_DATATYPE_FLOAT_16:
+                setupRopePositionEmbeddingFP16(pad_left, n_tokens, n_inputs, n_past, attention_map, n_skip_prefix, n_apply_prefix_offset); break;
+            default: __ERROR("Unsupported rope position dtype {}", d_pos.str()); return false;
+        }
+        // clang-format on
+    } else if (m_positional_encoding.type == PositionalEncoding::ABSOLUTE) {
+        uint32_t* position_id_buffer = (uint32_t*)getBuffer(t_position_ids);
+        std::memset(position_id_buffer, 0, n_tokens * sizeof(uint32_t));
+
+        // Fill up position_ids buffer
+        uint32_t* pos_id_start = &position_id_buffer[pad_left ? n_tokens - n_inputs : 0];
+        uint32_t* pos_id_end   = pos_id_start + n_inputs;
+        std::iota(pos_id_start, pos_id_end, n_past);
+    }
+
+    __TRACE("qnn-htp: setup-input-tensors complete : {} usec", start.elapsed_usec());
+    return true;
+}
+
+bool QnnNspModel::runInferenceHelper(bool pipeline, int32_t* total_wait, int32_t* total_exec) {
+    // run_info is set in runInference
+    int32_t idx                  = 0;
+    int32_t wait_kv_update_count = _kv_update_count;
+
+    auto [variant, n_processed, tokens] = run_info; // based on type one of the embedding and token vector will be empty.
+    for (auto& nsp_graph : m_nsp_graphs) {
+        //__DEBUG("execute({}, {}, {})", variant, m_inference_count, wait_kv_update_count);
+        if (!nsp_graph.execute(variant, m_inference_count, wait_kv_update_count)) return false;
+        auto [cur_wait, cur_exec] = nsp_graph.getExecutionStats();
+
+        // If we are pipelining execution with KV$Update, dispatch KV$ update jobs
+        if (pipeline) {
+            qualla::Timer timer;
+
+            int32_t n_past   = static_cast<int32_t>(m_nPast + n_processed);
+            if(!m_disableKvCache)
+                _kv_update_count = _kv_dispatcher->dispatch(idx, variant, n_past);
+            cur_wait += timer.elapsed_usec();
+        }
+
+        *total_exec += cur_exec;
+        *total_wait += cur_wait;
+        idx++;
+    }
+
+    if (pipeline) {
+        if(m_inputType == InputType::TOKENS) // used tokens for processing, save them
+            token_history.insert(token_history.end(), &tokens[0], &tokens[n_processed]);
+        else if(m_inputType == InputType::UNKNOWN)
+        {
+            __ERROR("Unknown input type found");
+            return false;
+        }
+        m_nPast += n_processed;
+    }
+
+    if (_debug_outputs){
+        if(m_modelArchitectureType == ModelArchitectureType::ENCODER){
+            if(!debugOutputs(m_nsp_graphs.back().variants[run_info.n_tokens]->getOutput(m_layerNames[LayerType::POOL_OUTPUT]), m_layerNames[LayerType::POOL_OUTPUT])){
+                __DEBUG("qnn-htp : Failed to save {} tensor", m_layerNames[LayerType::POOL_OUTPUT]);
+            }
+            if(!debugOutputs(m_nsp_graphs.back().variants[run_info.n_tokens]->getOutput(m_layerNames[LayerType::SEQ_OUTPUT]), m_layerNames[LayerType::SEQ_OUTPUT])){
+                __DEBUG("qnn-htp : Failed to save {} tensor", m_layerNames[LayerType::SEQ_OUTPUT]);
+            }
+        }
+        else {
+            if(!debugOutputs(m_nsp_graphs.back().variants[variant]->getOutput(m_layerNames[LayerType::OUTPUT]), m_layerNames[LayerType::OUTPUT])) {
+                __DEBUG("qnn-htp : Failed to save {} tensor", m_layerNames[LayerType::OUTPUT]);
+            }
+        }
+    }
+
+    m_inference_count++;
+    return true;
+}
+
+bool QnnNspModel::debugOutputs(QnnUtils::Tensor* outTensor, std::string& outTensorName){
+
+    if(outTensor == NULL){
+        __DEBUG("qnn-htp : Encountered NULL Tensor");
+        return false;
+    }
+
+    auto [variant, n_processed, tokens] = run_info;
+
+    int output_bw = outTensor->dtype.bw(); // Detect 8-bit vs 16-bit logits
+    uint8_t *output_buffer = (uint8_t *) getBuffer(outTensor);
+
+    int32_t offset = (variant == m_ctx_size) ? (m_ctx_size - n_processed) : 0;
+    int32_t bufsize = 0;
+    if(m_modelArchitectureType == ModelArchitectureType::ENCODER){
+        bufsize = m_ctx_size * m_embd_size * output_bw; // ctx * embed_size * output_bitwidth
+        // Bert is saving complete out buffer as it is.
+    }
+    else{
+        // Reducing buffer to number of processed tokens and each token is of vocab_size
+        bufsize = n_processed * m_vocab_size * output_bw; // processed_token * vocab_size * output_bitwidth
+        output_buffer += offset * m_vocab_size * output_bw; // shift output buffer to  offset * vocab_size * output_bitwidth
+    }
+
+    std::string fname = fmt::format("{}/{}/{:03d}", _debug_path, outTensorName, m_inference_count);
+    QnnUtils::writeRawData(output_buffer, bufsize, fname);
+    return true;
+
+}
+
+int32_t QnnNspModel::selectVariantStrategy(int32_t n_inputs, int32_t n_past, int32_t cur_variant) {
+    int32_t best_variant = cur_variant;
+    int32_t best_cost    = INT32_MAX;
+    int32_t switch_cost  = 10; // Currently hard-coded to 10ms
+
+    for (auto [variant, latency] : variant_latency) {
+        // If variant cannot support the n_past, it is a non-starter
+        // e.g. AR-128 with ctx_size=1024 can only support upto n_past=896 since it uses 128 output
+        if (n_past + n_inputs > m_ctx_size) continue;
+
+        const int32_t n_iters = 1 + ((n_inputs - 1) / variant);
+        const int32_t cost    = latency * n_iters + ((variant == cur_variant) ? 0 : switch_cost);
+        if (cost < best_cost) {
+            best_variant = variant;
+            best_cost    = cost;
+        }
+    }
+
+    __DEBUG("qnn-htp : Variant selected AR={} (~ {} ms)", best_variant, best_cost);
+    return best_variant;
+}
+
+size_t QnnNspModel::runInference(
+        const std::vector<int32_t>& in_tokens,
+        const std::vector<int32_t>& attention_map,
+        std::vector<float>&         output,
+        bool                        output_all
+) {
+    qualla::Timer start;
+
+    __TRACE("runInference logits_all={} in_tokens={}", output_all, in_tokens);
+
+    if(m_inputType != InputType::TOKENS) {
+        throw std::runtime_error("Wrong Type of input is supplied for token type query.");
+    }
+
+    if (in_tokens.size() == 0) return 0;
+
+    // Select variant based on variant_latency, or default to current variant
+    std::vector<int32_t> tokens(in_tokens);
+    if (!variant_latency.empty() && !m_disableKvCache) {
+        const int32_t cur_variant = _kv_dispatcher->getCurVariant();
+        const int32_t new_variant = selectVariantStrategy(tokens.size(), m_nPast, cur_variant);
+        if (cur_variant != new_variant) // Switch variant if necessary
+            _kv_update_count = _kv_dispatcher->dispatch(new_variant, m_nPast);
+    }
+
+    // If variant selected in BERT-Mode, append token history to current request
+    int32_t variant = 0;
+    if(!m_disableKvCache)
+        variant = _kv_dispatcher->getCurVariant();
+    else
+        variant = nsp_graph_count.rbegin()->first; // pick largest variant
+    if (variant == m_ctx_size && m_nPast != 0)
+        tokens.insert(tokens.begin(), token_history.begin(), token_history.end());
+
+    const int32_t n_inputs = static_cast<int32_t>(tokens.size());
+    const int32_t n_past   = static_cast<int32_t>(m_nPast);
+    const int32_t n_valid  = n_past + n_inputs;
+    run_info.n_tokens      = variant;
+    if (variant != m_ctx_size && m_nPast + variant > m_ctx_size) {
+        __ERROR("qnn-htp: exceeding ctx_size! : {} + {} > {}", m_nPast, variant, m_ctx_size);
+        return 0;
+    }
+
+    // Calculate number of batches for run-inference
+    const int32_t num_iters = 1 + ((n_inputs - 1) / variant);
+    __DEBUG("qnn-htp: run-inference : {} tokens (AR-{} * {} iters)", n_inputs, variant, num_iters);
+
+    // Validate attention_map size
+    if (!attention_map.empty() && attention_map.size() != n_inputs &&
+        attention_map.size() != n_inputs * (n_past + n_inputs)) {
+        // clang-format off
+        __ERROR("qnn-htp: attention_map must be 1D(n_inputs) or 2D(n_inputs * (n_past + n_inputs))"
+                "but has size={} for n_past={} n_inputs={}", attention_map.size(), n_past, n_inputs);
+        // clang-format on
+        return 0;
+    }
+    std::vector<int32_t> chunked_attn_map;
+
+    // Technical note: int32_t can hold upto 596 hours
+    // Even int16_t should be sufficient here - it holds upto 32.8 seconds
+    int32_t total_wait = 0;
+    int32_t total_exec = 0;
+
+    // user choice overwrites the default behaviour in case of Embedding models
+    if(m_modelArchitectureType == ModelArchitectureType::ENCODER)
+        output_all = !m_pooled_output;
+
+    // Reset logit accumulator
+    size_t output_count = output_all ? n_inputs : 1; // actual number of logits
+
+    if(m_modelArchitectureType == ModelArchitectureType::ENCODER)
+        output.resize(output_count * m_embd_size);
+    else
+        output.resize(output_count * m_vocab_size);
+
+    for (int i = 0; i < num_iters; i++) {
+        const int32_t update_size = std::min(variant, n_inputs - i * variant);
+        run_info.n_processed      = update_size;
+        run_info.tokens.assign(&tokens[i * variant], &tokens[i * variant + update_size]);
+
+        int32_t n_skip_prefix =
+                (i * variant < _offset_to_apply_kv_prefix) ? _size_to_skip_kv_prefix : 0;
+        int32_t n_apply_prefix_offset = 0;
+        if (i * variant < _offset_to_apply_kv_prefix)
+            n_apply_prefix_offset = std::min(variant, _offset_to_apply_kv_prefix - i * variant);
+
+        // Chunk inputs and attention mask
+        std::span<int32_t> tokens_chunk   = std::span{tokens.data(),tokens.size()}.subspan(i * variant, update_size);
+        std::span<int32_t> attn_map_chunk = std::span<int32_t>();
+        if (attention_map.size() == n_inputs) {
+            chunked_attn_map.resize(update_size);
+            // Take exactly update_size elements. Be mindful to decrease offset already processed
+            for (int j = 0; j < update_size; j++)
+                chunked_attn_map[j] = attention_map[i * variant + j] - (i * variant);
+            attn_map_chunk = std::span{chunked_attn_map.data(),chunked_attn_map.size()};
+        } else if (attention_map.size() == n_inputs * (n_past + n_inputs)) {
+            chunked_attn_map.clear();
+            chunked_attn_map.resize(update_size * (m_nPast + update_size));
+
+            for (int j = 0; j < update_size; j++) {
+                // Be mindful. m_nPast changes each iteration.
+                // n_tokens is total #tokens called. update_size is the n_tokens for this iteration
+                // n_past is the initial m_nPast. n_valid = n_past + n_tokens
+                std::memcpy(
+                        &chunked_attn_map[j * (m_nPast + update_size)],
+                        &attention_map[i * variant * n_valid + j * n_valid],
+                        (m_nPast + update_size) * sizeof(int32_t)
+                );
+            }
+            attn_map_chunk = std::span{chunked_attn_map.data(),chunked_attn_map.size()};
+        }
+
+        if (!setupInputTensors(
+                    tokens_chunk,
+                    (variant == m_ctx_size) ? 0 : m_nPast,
+                    attn_map_chunk,
+                    n_skip_prefix,
+                    n_apply_prefix_offset
+            ))
+            return 0;
+
+        // Run Inference and pipeline KV$ update iff n_inputs is exactly 1 or we have more batches
+        bool pipeline = (n_inputs == 1 || i < num_iters - 1);
+        if (!runInferenceHelper(pipeline, &total_wait, &total_exec)) return 0;
+
+        if (m_modelArchitectureType != ModelArchitectureType::ENCODER && output_all) {
+            // Accumulate logits
+            const size_t logit_offset = i * variant * m_vocab_size;
+            const size_t logit_count = update_size * m_vocab_size;
+            getDequantLogits(std::span{output.data(), output.size()}.subspan(logit_offset, logit_count),
+                             output_all);
+        }
+    }
+
+    // Return last logit if not accumulating
+    if(m_modelArchitectureType != ModelArchitectureType::ENCODER) {
+        if(!output_all)
+            getDequantLogits(std::span{output.data(), output.size()}, output_all);
+    }
+    else
+        getEmbeddings(std::span{output.data(), output.size()});
+
+    __DEBUG("qnn-htp: run-inference complete : {} usec : wait {} exec {}",
+            start.elapsed_usec(),
+            total_wait,
+            total_exec);
+
+    // threadpool.suspend();
+    return output_count;
+}
+
+bool QnnNspModel::quantizeInput(float* in, size_t tensorOffset ,size_t length) {
+
+    if(t_input_ids  == nullptr) {
+        __ERROR("Input Tensor {} not found during execute", m_layerNames[LayerType::INPUT]);
+        return false;
+    }
+
+    const auto scale = t_input_ids->quantParam[0].scale;
+    const auto offset = t_input_ids->quantParam[0].offset;
+
+    // clang-format off
+    switch (t_input_ids->dtype) {
+        case QNN_DATATYPE_UFIXED_POINT_8: QnnUtils::quantizeTensorPtr(in, (uint8_t*)getBuffer(t_input_ids) + tensorOffset, offset, scale, length); break;
+        case QNN_DATATYPE_UFIXED_POINT_16: QnnUtils::quantizeTensorPtr(in, (uint16_t*)getBuffer(t_input_ids) + tensorOffset, offset, scale, length); break;
+        default: __ERROR("Unsupported alpha tensor dtype {}", t_input_ids->dtype.str()); return false;
+    }
+
+    return true;
+}
+
+size_t QnnNspModel::getEmbeddingBufferSize() {
+    return m_embeddingBufferSize;
+}
+
+size_t QnnNspModel::runInference(
+        std::vector<uint8_t>&       embedding,
+        const std::vector<int32_t>& attention_map,
+        std::vector<float>&         output,
+        bool                        output_all
+) {
+    qualla::Timer start;
+
+    __DEBUG("qnn-htp: run-inference start : n_Embd {}", embedding.size());
+
+    if(m_inputType != InputType::EMBEDDINGS) {
+        throw std::runtime_error("Embedding input type is not supported by the model.");
+    }
+
+    if (embedding.size() == 0) return true;
+
+    size_t embedBufSize = m_embeddingBufferSize;
+    // Select variant based on variant_latency, or default to current variant
+    int32_t curTokenCount = embedding.size() / embedBufSize;
+    if (!variant_latency.empty() && !m_disableKvCache) {
+        const int32_t cur_variant = _kv_dispatcher->getCurVariant();
+        const int32_t new_variant = selectVariantStrategy(curTokenCount, m_nPast, cur_variant);
+        if (cur_variant != new_variant) // Switch variant if necessary
+            _kv_update_count = _kv_dispatcher->dispatch(new_variant, m_nPast);
+    }
+
+    // If variant selected in BERT-Mode, append token history to current request
+    const int32_t variant = _kv_dispatcher->getCurVariant();
+
+    // We will never be maintaining history for the embedding
+
+    const int32_t n_inputs = static_cast<int32_t>(curTokenCount);
+    const int32_t n_past   = static_cast<int32_t>(m_nPast);
+    const int32_t n_valid  = n_past + n_inputs;
+    run_info.n_tokens = variant;
+
+    if (variant != m_ctx_size && m_nPast + variant > m_ctx_size) {
+        __ERROR("qnn-htp: exceeding ctx_size! : {} + {} > {}", m_nPast, variant, m_ctx_size);
+        return 0;
+    }
+
+    const int32_t num_iters = 1 + ((n_inputs - 1) / variant);
+    __DEBUG("qnn-htp: run-inference : {} tokens (AR-{} * {} iters)",
+            n_inputs,
+            variant,
+            num_iters);
+
+    // Validate attention_map size
+    if (!attention_map.empty() && attention_map.size() != n_inputs &&
+        attention_map.size() != n_inputs * (n_past + n_inputs)) {
+        // clang-format off
+        __ERROR("qnn-htp: attention_map must be 1D(n_inputs) or 2D(n_inputs * (n_past + n_inputs))"
+                "but has size={} for n_past={} n_inputs={}", attention_map.size(), n_past, n_inputs);
+        // clang-format on
+        return 0;
+    }
+    std::vector<int32_t> chunked_attn_map;
+
+    // Technical note: int32_t can hold upto 596 hours
+    // Even int16_t should be sufficient here - it holds upto 32.8 seconds
+    int32_t total_wait = 0;
+    int32_t total_exec = 0;
+
+    // Reset logit accumulator
+    size_t output_count = output_all ? n_inputs : 1; // actual number of logits
+
+    output.resize(output_count * m_vocab_size);
+
+    for (int i = 0; i < num_iters; i++) {
+        const int32_t update_size = std::min(variant, n_inputs - i * variant);
+        run_info.n_processed      = update_size;
+        const int32_t startIdx    = i * variant * embedBufSize;
+
+        int32_t n_skip_prefix =
+                (i * variant < _offset_to_apply_kv_prefix) ? _size_to_skip_kv_prefix : 0;
+        int32_t n_apply_prefix_offset = 0;
+        if (i * variant < _offset_to_apply_kv_prefix)
+            n_apply_prefix_offset = std::min(variant, _offset_to_apply_kv_prefix - i * variant);
+
+        // Chunk inputs and attention mask
+        std::span<uint8_t> embedding_chunk  = std::span{embedding.data(),embedding.size()}.subspan(startIdx, update_size*embedBufSize);
+        std::span<int32_t> attn_map_chunk = std::span<int32_t>();
+        if (attention_map.size() == n_inputs) {
+            chunked_attn_map.resize(update_size);
+            // Take exactly update_size elements. Be mindful to decrease offset already processed
+            for (int j = 0; j < update_size; j++)
+                chunked_attn_map[j] = attention_map[i * variant + j] - (i * variant);
+            attn_map_chunk = std::span{chunked_attn_map.data(),chunked_attn_map.size()};
+        } else if (attention_map.size() == n_inputs * (n_past + n_inputs)) {
+            chunked_attn_map.clear();
+            chunked_attn_map.resize(update_size * (m_nPast + update_size));
+
+            for (int j = 0; j < update_size; j++) {
+                // Be mindful. m_nPast changes each iteration.
+                // n_tokens is total #tokens called. update_size is the n_tokens for this iteration
+                // n_past is the initial m_nPast. n_valid = n_past + n_tokens
+                std::memcpy(
+                        &chunked_attn_map[j * (m_nPast + update_size)],
+                        &attention_map[i * variant * n_valid + j * n_valid],
+                        (m_nPast + update_size) * sizeof(int32_t)
+                );
+            }
+            attn_map_chunk = std::span{chunked_attn_map.data(),chunked_attn_map.size()};
+        }
+
+        if (!setupInputTensors(
+                embedding_chunk,
+                (variant == m_ctx_size) ? 0 : m_nPast,
+                attn_map_chunk,
+                n_skip_prefix,
+                n_apply_prefix_offset
+        ))
+            return 0;
+
+        // Run Inference and pipeline KV$ update iff n_inputs is exactly 1 or we have more batches
+        bool pipeline = (n_inputs == 1 || i < num_iters - 1);
+        if (!runInferenceHelper(pipeline, &total_wait, &total_exec)) return 0;
+
+        if (output_all) {
+            // Accumulate logits
+            const size_t logit_offset = i * variant * m_vocab_size;
+            const size_t logit_count = update_size * m_vocab_size;
+            getDequantLogits(std::span{output.data(), output.size()}.subspan(logit_offset, logit_count),
+                             output_all);
+        }
+    }
+
+    // Return last logit if not accumulating
+    if(!output_all)
+        getDequantLogits(std::span{output.data(), output.size()}, output_all);
+
+    __DEBUG("qnn-htp: run-inference complete : {} usec : wait {} exec {}",
+            start.elapsed_usec(),
+            total_wait,
+            total_exec);
+
+    return output_count;
+}
+
+bool QnnNspModel::cacheEosEmbedding(std::vector<uint8_t>& eosEmbedding) {
+    m_eosEmbedding = eosEmbedding;
+    return true;
+}
+
+bool QnnNspModel::setKVCacheNPast(size_t n_past, const std::vector<bool>& selected) {
+    __TRACE("setKVCacheNPast (m_nPast={} -> n_past={})", m_nPast, n_past);
+    if (n_past == m_nPast && n_past != 0) return true;
+
+    if (m_nPast + run_info.n_processed < n_past) {
+        __ERROR("qnn-htp: set-kv n_past update larger than number of processed tokens : n_past {} n_proc {}",
+                n_past,
+                m_nPast + run_info.n_processed);
+        return false;
+    }
+
+    if (m_inputType == InputType::TOKENS) {
+        if (n_past == 0) {
+            int32_t new_variant = nsp_graph_count.rbegin()->first;
+            _kv_update_count = _kv_dispatcher->dispatch(new_variant, 0, selected);
+            token_history.clear();
+
+        } else if (n_past < m_nPast) {
+            auto [variant, update_size, tokens] = run_info;
+            _kv_update_count = _kv_dispatcher->dispatch(variant, n_past);
+            token_history.resize(n_past);
+        } else {
+            int32_t new_variant = nsp_graph_count.begin()->first;
+            _kv_update_count = _kv_dispatcher->dispatch(new_variant, n_past, selected);
+
+            auto [variant, update_size, tokens] = run_info;
+
+            if (variant == m_ctx_size) {
+                token_history.assign(&tokens[0], &tokens[n_past]);
+            } else if (selected.empty()) {
+                token_history.insert(token_history.end(), &tokens[0], &tokens[n_past - m_nPast]);
+            } else {
+                for (auto i = 0; i < tokens.size(); ++i) {
+                    if (selected[i]) token_history.push_back(tokens[i]);
+                }
+            }
+        }
+    }
+    else if (m_inputType == InputType::EMBEDDINGS) { // Don't add embedding history, It is costly maintenance to do.
+        if (n_past == 0) {
+            int32_t new_variant = nsp_graph_count.rbegin()->first;
+            _kv_update_count = _kv_dispatcher->dispatch(new_variant, 0, selected);
+        } else if (n_past < m_nPast) {
+            auto [variant, update_size, tokens] = run_info;
+            _kv_update_count = _kv_dispatcher->dispatch(variant, n_past);
+        } else {
+            int32_t new_variant = nsp_graph_count.begin()->first;
+            _kv_update_count = _kv_dispatcher->dispatch(new_variant, n_past, selected);
+        }
+    }
+    else
+    {
+        __ERROR("Wrong type of input is found.");
+        return false;
+    }
+
+    m_nPast = n_past;
+    return true;
+}
+
+template <typename U, typename T>
+inline void deQuantizeOutputs(
+        U*            inputs,
+        std::span<T>& outputs,
+        const double  scale,
+        const int32_t offset,
+        const int     count
+) {
+#pragma clang loop vectorize(enable) interleave(enable)
+    for (int i = 0; i < count; ++i)
+        outputs[i] = ((T)inputs[i] + offset) * scale;
+}
+
+template <typename U, typename T>
+inline void castOutputs(U* inputs, std::span<T>& outputs, const int numElements, const int bitWidth) {
+    if(bitWidth == 2) {
+#pragma clang loop vectorize(enable) interleave(enable)
+        for (int i = 0; i < numElements; ++i)
+            outputs[i] = fp16_ieee_to_fp32_value(inputs[i]);
+    }
+    else if(bitWidth == 4) {
+#pragma clang loop vectorize(enable) interleave(enable)
+        for (size_t i = 0; i < numElements; i++) {
+            outputs[i] = inputs[i];
+        }
+    }
+}
+
+size_t QnnNspModel::getDequantLogits(std::span<float> dequant_logits, bool logits_all) {
+    qualla::Timer start;
+
+    QnnUtils::Tensor* const logit_spec =
+            m_nsp_graphs.back().variants[run_info.n_tokens]->getOutput(m_layerNames[LayerType::OUTPUT]);
+    const int return_size      = logits_all ? run_info.n_processed : 1;
+    const auto [scale, offset] = logit_spec->quantParam[0];
+
+    auto d_logits = QnnUtils::DataType(logit_spec->tensor);
+
+    int logit_bw = logit_spec->dtype.bw();
+
+    uint8_t* logit_buffer = (uint8_t*)getBuffer(logit_spec);
+    // const int return_size = logits_all ? run_info.n_processed : 1;
+    if (logit_spec->dims.getNumElements() == m_vocab_size) {
+        // BERT Mode graph may return only the last logit
+        // If only one logit is returned, simply return the last logit
+        if (return_size > 1)
+            throw std::runtime_error("Requested all logits, but graph only produces one logit");
+    } else {
+        // If multiple logits are returned, offset to the correct location in the buffer
+        if (run_info.n_tokens == m_ctx_size) {
+            // This was left-padded, logits are at [n_tokens - n_processed, n_tokens]
+            logit_buffer += (run_info.n_tokens - return_size) * m_vocab_size * d_logits.bw();
+        } else if (logits_all == false) {
+            // This was right-padded, logits are at indexes [0, n_processed]
+            logit_buffer += (run_info.n_processed - 1) * m_vocab_size * d_logits.bw();
+        }
+    }
+    const int n_logits = static_cast<int>(m_vocab_size * return_size);
+    __TRACE("qnn-htp: get-logits logits_all={} for {} tokens. Returning {}*{}",
+            logits_all,
+            run_info.n_processed,
+            return_size,
+            m_vocab_size);
+
+    switch (d_logits) {
+    case QNN_DATATYPE_UFIXED_POINT_8:
+        deQuantizeOutputs((uint8_t*)logit_buffer, dequant_logits, scale, offset, n_logits);
+        break;
+    case QNN_DATATYPE_UFIXED_POINT_16:
+        deQuantizeOutputs((uint16_t*)logit_buffer, dequant_logits, scale, offset, n_logits);
+        break;
+    case QNN_DATATYPE_FLOAT_16: {
+        castOutputs((uint16_t*)logit_buffer, dequant_logits, n_logits, logit_bw);
+        break;
+    }
+    default:
+        __ERROR("Unsupported logits dtype {}", d_logits.str());
+    }
+
+    __DEBUG("qnn-htp: getDequantLogits complete : {} usec (return_size={})",
+            start.elapsed_usec(),
+            return_size);
+    return return_size;
+}
+
+bool QnnNspModel::calculate_rope_embeddings(void) {
+    if (m_positional_encoding.type != PositionalEncoding::ROPE) return true;
+
+    const size_t nmemb  = m_ctx_size * m_pos_dim;
+    const int    pos_bw = d_pos.bw();
+
+    rope_sin = malloc(nmemb * pos_bw);
+    rope_cos = malloc(nmemb * pos_bw);
+
+    auto [q_scale, q_offset] = t_position_ids_cos->quantParam[0];
+    if (d_pos == QNN_DATATYPE_FLOAT_16) { // If floating point, don't quantize!
+        q_scale  = 1.0;
+        q_offset = 0;
+    }
+
+    // Calculate inv_freq array
+    std::vector<double> inv_freq(m_pos_dim);
+    const double        exponent = 1.0 / static_cast<double>(m_pos_dim);
+    for (int j = 0; j < m_pos_dim; j++)
+        inv_freq[j] = 1.0 / pow(rope_theta, j * exponent);
+    double attention_factor = 1.0;
+    if (rope_scaling.rope_type == RopeScalingParams::ROPE_LLAMA3) {
+        // Implemented from HuggingFace
+        // https://github.com/huggingface/transformers/blob/47c29ccfaf56947d845971a439cbe75a764b63d7/src/transformers/modeling_rope_utils.py#L298
+        const double& factor           = rope_scaling.llama3_params.factor;
+        const double& low_freq_factor  = rope_scaling.llama3_params.low_freq_factor;
+        const double& high_freq_factor = rope_scaling.llama3_params.high_freq_factor;
+        const int&    old_context_len = rope_scaling.llama3_params.original_max_position_embeddings;
+
+        const double low_freq_wavelen  = old_context_len / low_freq_factor;
+        const double high_freq_wavelen = old_context_len / high_freq_factor;
+
+        for (int j = 0; j < m_pos_dim; j++) {
+            const double wavelen = 2 * M_PI / inv_freq[j];
+            if (wavelen < high_freq_wavelen) // wavelen < high_freq_wavelen: do nothing
+                continue;
+            else if (wavelen > low_freq_wavelen) // wavelen > low_freq_wavelen: divide by factor
+                inv_freq[j] = 1.0 / static_cast<double>(factor * pow(rope_theta, j * exponent));
+            else { // otherwise: interpolate between the two, using a smooth factor
+                assert(low_freq_wavelen != high_freq_wavelen);
+                const double smooth =
+                        (static_cast<double>(old_context_len) / wavelen - low_freq_factor) /
+                        (high_freq_factor - low_freq_factor);
+                inv_freq[j] = ((1 - smooth) * inv_freq[j] / factor + smooth * inv_freq[j]);
+            }
+        }
+    } else if (rope_scaling.rope_type == RopeScalingParams::ROPE_LONGROPE) {
+        // Validate factor >= 1.0, len(long_factor) == rope-dim and len(short_factor) == rope-dim
+        const double& factor       = rope_scaling.longrope_params.factor;
+        const int& old_context_len = rope_scaling.longrope_params.original_max_position_embeddings;
+
+        const auto& inv_factors = (m_ctx_size > old_context_len)
+                                          ? rope_scaling.longrope_params.long_factor
+                                          : rope_scaling.longrope_params.short_factor;
+
+        if (inv_factors.size() != m_pos_dim)
+            throw std::runtime_error(fmt::format(
+                    "long-factor (len={}) and short-factor (len={}) must have length rope-dim={}",
+                    rope_scaling.longrope_params.long_factor.size(),
+                    rope_scaling.longrope_params.short_factor.size(),
+                    m_pos_dim
+            ));
+
+        for (int j = 0; j < m_pos_dim; j++)
+            inv_freq[j] = inv_freq[j] / inv_factors[j];
+
+        attention_factor =
+                std::sqrt(1.0 + std::log(factor) / std::log(static_cast<double>(old_context_len)));
+    }
+    for (int i = 0; i < m_ctx_size; i++) {
+        for (int j = 0; j < m_pos_dim; j++) {
+            const double freq = i * inv_freq[j];
+
+            const double sin_val = ((sin(freq) * attention_factor) / q_scale) - q_offset;
+            const double cos_val = ((cos(freq) * attention_factor) / q_scale) - q_offset;
+
+            // round() instead of floor() seems to produce an acuracy drop. To debug later
+            switch (d_pos) {
+            case QNN_DATATYPE_UFIXED_POINT_8:
+                ((uint8_t*)rope_sin)[i * m_pos_dim + j] = static_cast<uint8_t>(sin_val);
+                ((uint8_t*)rope_cos)[i * m_pos_dim + j] = static_cast<uint8_t>(cos_val);
+                break;
+            case QNN_DATATYPE_UFIXED_POINT_16:
+                ((uint16_t*)rope_sin)[i * m_pos_dim + j] = static_cast<uint16_t>(sin_val);
+                ((uint16_t*)rope_cos)[i * m_pos_dim + j] = static_cast<uint16_t>(cos_val);
+                break;
+            case QNN_DATATYPE_FLOAT_16:
+                ((uint16_t *)rope_sin)[i * m_pos_dim + j] = fp16_ieee_from_fp32_value(sin_val);
+                ((uint16_t*)rope_cos)[i * m_pos_dim + j] = fp16_ieee_from_fp32_value(cos_val);
+                break;
+            default:
+                __ERROR("Unsupported position ids datatype {}", d_pos.str());
+                return false;
+            }
+        }
+    }
+
+    if (_debug_tensors) {
+        std::string dtype =
+                fmt::format("{}", (d_pos == QNN_DATATYPE_FLOAT_16) ? "f" : "u", pos_bw * 8);
+        std::string fname_sin = fmt::format("{}/position_ids_sin.{}.dat", _debug_path, pos_bw * 8);
+        std::string fname_cos = fmt::format("{}/position_ids_cos.{}.dat", _debug_path, pos_bw * 8);
+        QnnUtils::writeRawData(rope_sin, nmemb * pos_bw, fname_sin);
+        QnnUtils::writeRawData(rope_cos, nmemb * pos_bw, fname_cos);
+    }
+
+    return true;
+}
+
+bool QnnNspModel::load_lmhead_weight_as_input(void) {
+    if (!_lmhead_weight_input) return true;
+    if (_lmhead_weight_input && lmhead_weight_dir.empty()) {
+        __ERROR("NSPModel: LMhead weight file not found");
+        return false;
+    }
+    for (auto& variant : m_variant_list) {
+        for (auto& [tname, tspec] : variant.input_specs) {
+            if (tname.compare("weight") == 0) {
+                // weight tensor file name should be in same format as tensor name present in graph
+                std::string weight_file =
+                        (model_basedir / fs::path(lmhead_weight_dir) / fs::path(tname + ".raw"))
+                                .string();
+
+                QnnUtils::Dims dims        = tspec.dims;
+                size_t         numElements = dims.getNumElements();
+
+                size_t             size = sizeof(float);
+                std::vector<float> weight_f32; // Temporary variable to load fp32 values
+                weight_f32.reserve(numElements);
+
+                FILE* fp = fopen(weight_file.c_str(), "r");
+                if (fp == NULL) {
+                    __ERROR("NSPModel: Error opening file: {}", weight_file);
+                    return false;
+                }
+
+                size_t count = fread(weight_f32.data(), size, numElements, fp);
+                fclose(fp);
+
+                if (count != numElements) {
+                    __ERROR("NSPModel: Could not load {} - expected file size {}",
+                            weight_file,
+                            numElements * size);
+                    return false;
+                }
+
+                int8_t* weight_buffer = (int8_t*)getBuffer(tspec);
+                // Quantize the values, per width quantization
+                QnnUtils::perWidthQuantizeTensorPtr(
+                        weight_f32.data(),
+                        weight_buffer,
+                        tspec.quantParam,
+                        dims.height,
+                        dims.width,
+                        dims.channel
+                );
+            }
+        }
+    }
+    return true;
+}
+
+bool QnnNspModel::flushLoraWeightsBuffers(void){
+    if(!_lora_enabled){
+        __ERROR("qnn-htp: Model does not support LoRA weights.");
+        return false;
+    }
+
+    for (auto& variant : m_variant_list) {
+        for (auto& [tname, tspec] : variant.input_specs) {
+            if (tname.find("lora") != std::string::npos) { // find lora weights tensors and flush them out
+                if(getBuffer(tspec) == nullptr)
+                    return false;
+                size_t numElements = tspec.dims.getNumElements();
+                auto   offset      = tspec.quantParam[0].offset;
+                // Since values needs to be quantized so zero is going to get translated.
+                // clang-format off
+                switch (tspec.dtype) {
+                    case QNN_DATATYPE_UFIXED_POINT_8:  std::fill_n((uint8_t*)getBuffer(tspec), numElements, static_cast<uint8_t>(-offset));  break;
+                    case QNN_DATATYPE_UFIXED_POINT_16: std::fill_n((uint16_t*)getBuffer(tspec), numElements, static_cast<uint16_t>(-offset));  break;
+                    case QNN_DATATYPE_FLOAT_16:{
+                        uint16_t *buffer = (uint16_t *)getBuffer(tspec);
+                        for(int i=0;i<numElements;i++){
+                            buffer[i] = fp16_ieee_from_fp32_value(-offset);
+                        }
+                        break;
+                    }
+                    default: __ERROR("Unsupported {} datatype for {} tensor", tspec.dtype.str(), tname); return false;
+                }
+            }
+        }
+    }
+    return true;
+}
+
+bool QnnNspModel::applyLoraWeights(const std::string& lora_weights_name){
+    if(!_lora_enabled){
+        __ERROR("qnn-htp: Model does not support LoRA weights.");
+        return false;
+    }
+    if (lora_conf != LoraConfigType::LORA_INPUT_WEIGHT_ENABLE) {
+        __ERROR("qnn-htp: LoRA config is not enable for input weights");
+        return false;
+    }
+
+    if (!lora_config.contains(lora_weights_name)) {
+        __ERROR("qnn-htp: Could not find lora weights config to apply ");
+        return false;
+    }
+
+    if (_lora_enabled && lora_config[lora_weights_name].path.empty()) {
+        __ERROR("qnn-htp: LoRA weights dir is empty for {}", lora_weights_name);
+        return false;
+    }
+
+    if (!applyLoraStrength(
+            lora_config[lora_weights_name].alpha_tensor_name,
+            lora_config[lora_weights_name].alpha_tensor_val)) {
+        __ERROR("qnn-htp: Could not apply Alpha tensor ");
+        return false;
+    }
+
+    for (auto& variant : m_variant_list) {
+        for (auto& [tname, tspec] : variant.input_specs) {
+            if (tname.find("lora") != std::string::npos &&
+                    tname != lora_config[lora_weights_name].alpha_tensor_name) {
+                if(getBuffer(tspec) == nullptr)
+                    return false;
+                // lora tensor file names should be in same format as tensor names present in graph
+                std::string lora_weights_file =
+                        (model_basedir / fs::path(lora_config[lora_weights_name].path) / fs::path(tname + ".raw"))
+                                .string();
+
+                size_t numElements = tspec.dims.getNumElements();
+                auto   scale       = tspec.quantParam[0].scale;
+                auto   offset      = tspec.quantParam[0].offset;
+
+                size_t             size = sizeof(float);
+                std::vector<float> lora_weights_f32; // Temporary variable to load fp32 values
+                lora_weights_f32.reserve(numElements);
+
+                FILE* fp = fopen(lora_weights_file.c_str(), "r");
+                if (fp == NULL) {
+                    __ERROR("NSPModel: Error opening file: {}", lora_weights_file);
+                    return false;
+                }
+
+                size_t count = fread(lora_weights_f32.data(), size, numElements, fp);
+                fclose(fp);
+
+                if (count != numElements) {
+                    __ERROR("NSPModel: Could not load {} - expected file size {}",
+                            lora_weights_file,
+                            numElements * size);
+                    return false;
+                }
+
+                // Quantize the values
+                // clang-format off
+                switch (tspec.dtype) {
+                    case QNN_DATATYPE_UFIXED_POINT_8: QnnUtils::quantizeTensorPtr(lora_weights_f32.data(), (uint8_t*)getBuffer(tspec), offset, scale, numElements); break;
+                    case QNN_DATATYPE_UFIXED_POINT_16: QnnUtils::quantizeTensorPtr(lora_weights_f32.data(), (uint16_t*)getBuffer(tspec), offset, scale, numElements); break;
+                    case QNN_DATATYPE_FLOAT_16: float32ToFloat16((uint8_t *)getBuffer(tspec), lora_weights_f32.data(), numElements); break;
+                    default: __ERROR("Unsupported {} datatype for {} tensor", tspec.dtype.str(), tname); return false;
+                }
+            }
+        }
+    }
+    return true;
+}
+
+void QnnNspModel::dumpTensorSpecs() {
+    static const char* stringFmt =
+            "\t\t{ \"name\": \"%s\", \"dims\": [1, %d, %d, %d], "
+            "\"bitwidth\": %d, \"dtype\": \"%s\", \"scale\": [%s], \"offset\": [%s] },\n";
+    for (GraphVariant& variant : m_variant_list) {
+        GraphInfo_t* graph_info = variant.graph_info;
+
+        // Create output spec file and open it
+        std::string filename = fmt::format("{}/spec.{}.json", _debug_path, graph_info->graphName);
+
+        FILE* specFile = fopen(filename.c_str(), "w");
+        if (specFile == NULL) throw std::runtime_error("Error opening file : " + filename);
+
+        fprintf(specFile, "{\n\t\"graph_name\" : \"%s\",\n", variant.graph_name.c_str());
+        for (bool io : {true, false}) {
+            uint32_t n_tensors = (io) ? graph_info->numInputTensors : graph_info->numOutputTensors;
+            Qnn_Tensor_t* tensor = (io) ? graph_info->inputTensors : graph_info->outputTensors;
+            QnnUtils::TensorMap& tspecs = (io) ? variant.input_specs : variant.output_specs;
+
+            fprintf(specFile, (io) ? "\t\"inputs\" : [\n" : "\t\"outputs\" : [\n");
+            while (n_tensors-- > 0) {
+                std::string tname                    = QnnApi::getTensorName(*tensor);
+                auto& [_, dims, quant_params, dtype] = tspecs.at(tname);
+                auto& [__, h, w, c, bw]              = dims;
+                std::string scales;
+                std::string offsets;
+                QnnUtils::getQuantParamString(quant_params, scales, offsets);
+                // clang-format off
+                fprintf(specFile, stringFmt, tname.c_str(), h, w, c, bw, dtype.str(), scales.c_str(), offsets.c_str());
+                // clang-format on
+                tensor++;
+            }
+            fseek(specFile, -2, SEEK_CUR); // Remove trailing comma
+            fprintf(specFile, "\n\t],\n");
+        }
+        fseek(specFile, -2, SEEK_CUR); // Remove trailing comma
+        fprintf(specFile, "\n}");
+        fclose(specFile);
+    }
+}
+
+size_t QnnNspModel::loadKVCache(const std::string& load_path) {
+
+    if(m_disableKvCache){
+        __ERROR("KV cache is disabled, loading KV cache is not allowed");
+        return false;
+    }
+
+    std::ifstream fs(load_path, std::ios::in | std::ios::binary);
+    if (fs.fail()) {
+        // TODO: replace with proper error handling
+        __ERROR("qnn-htp: load-kv errror reading file {}", load_path);
+        return 0;
+    }
+
+    CacheFileSpec spec;
+    fs.read((char*)&spec, sizeof(spec));
+    if (spec.magic != 0xC0DE) {
+        __ERROR("qnn-htp: load-kv expected 0xC0DE found {:#x}", spec.magic);
+        return 0;
+    }
+
+    bool dtype_check = true;
+    // clang-format off
+    switch (d_kv) {
+    case QNN_DATATYPE_UFIXED_POINT_8: dtype_check = spec.dtype == CacheFileSpec::UINT8_T; break;
+    case QNN_DATATYPE_UFIXED_POINT_16: dtype_check = spec.dtype == CacheFileSpec::UINT16_T; break;
+    case QNN_DATATYPE_FLOAT_16: dtype_check = spec.dtype ==  CacheFileSpec::FLOAT16_T; break;
+    default: __ERROR("Unsupported KV$ datatype {}", d_kv.str()); return false;
+    }
+    // clang-format on
+
+    if (!dtype_check) {
+        __ERROR("Model has KV$ Dtype {} but found {} in cache", d_kv.str(), int(spec.dtype));
+        return false;
+    }
+
+    // clang-format off
+    __DEBUG("qnn-htp: load-kv {{ num_tensors {}, magic {}, dtype {}, n_heads {}, embed_dim {} update_size {} }}",
+        spec.num_tensors, spec.magic, int(spec.dtype), spec.n_heads, spec.embed_dim, spec.update_size);
+    // clang-format on
+
+    const int32_t n_valid = static_cast<int32_t>(spec.update_size);
+    const int32_t variant = nsp_graph_count.begin()->first; // Set KVManager to smallest variant
+    _kv_dispatcher->setVariant(variant);
+
+    // Lock, load KeyCache then ValueCache, unlock
+    for (auto& nsp_graph : m_nsp_graphs)
+        nsp_graph.waitForLock("loadKVCache", _kv_update_count, false);
+    for (auto& nsp_graph : m_nsp_graphs)
+        nsp_graph.kvmanager->loadCache(&fs, true, n_valid, variant, spec.n_heads);
+    for (auto& nsp_graph : m_nsp_graphs)
+        nsp_graph.kvmanager->loadCache(&fs, false, n_valid, variant, spec.n_heads);
+    for (auto& nsp_graph : m_nsp_graphs)
+        nsp_graph.releaseLock("loadKVCache");
+
+    fs.seekg(spec.num_tensors * sizeof(double), std::ios::cur);
+
+
+
+    // Loading previous runs history input only applicable in case of tokens.
+    // Embeddings history maintenance is costly in terms of memory and time.
+    if(m_inputType == InputType::TOKENS) {
+        token_history.clear();
+        token_history.resize(n_valid);
+        fs.read((char *) token_history.data(), n_valid * sizeof(int32_t));
+    }
+    else if(m_inputType == InputType::UNKNOWN) {
+        __ERROR("Wrong type of input is found.");
+        return false;
+    }
+    fs.close();
+
+    m_nPast = n_valid;
+    return spec.update_size;
+}
+
+bool QnnNspModel::saveKVCache(const std::string& save_path) {
+
+    if(m_disableKvCache){
+        __ERROR("KV cache is disabled, saving KV cache is not allowed");
+        return false;
+    }
+
+    std::ofstream fs(save_path, std::ios::out | std::ios::binary);
+    if (fs.fail()) {
+        __ERROR("qnn-htp: save-kv error opening file : {}", save_path);
+        throw std::runtime_error("Failed to write to cache file. Please re-check path");
+    }
+
+    const uint16_t n_valid = static_cast<uint16_t>(m_nPast);
+
+    auto dtype = CacheFileSpec::UINT8_T;
+    // clang-format off
+    switch (d_kv) {
+    case QNN_DATATYPE_UFIXED_POINT_8: dtype = CacheFileSpec::UINT8_T; break;
+    case QNN_DATATYPE_UFIXED_POINT_16: dtype = CacheFileSpec::UINT16_T; break;
+    case QNN_DATATYPE_FLOAT_16: dtype = CacheFileSpec::FLOAT16_T; break;
+    default: __ERROR("Unsupported KV$ datatype {}", d_kv.str()); return false;
+    }
+    // clang-format on
+
+    // Pre-calculate #tensors and n_heads to guide memory allocations
+    uint32_t n_tensors = 0;
+    int32_t  n_heads   = 0;
+    for (auto& nsp_graph : m_nsp_graphs) {
+        nsp_graph.waitForLock("saveKVCache", _kv_update_count, false);
+        n_tensors += nsp_graph.kvmanager->getNumKVTensors();
+        n_heads = std::max(n_heads, nsp_graph.kvmanager->getMaxNHeads());
+    }
+
+    // Save the cache file metadata
+    CacheFileSpec file_spec(
+            n_tensors, 0xc0de, dtype, 0x0, static_cast<uint16_t>(n_heads), m_kv_dim, n_valid
+    );
+    fs.write((char*)&file_spec, sizeof(file_spec));
+
+    // Dump KeyCache and ValueCache
+    for (auto& nsp_graph : m_nsp_graphs)
+        nsp_graph.kvmanager->dumpCache(&fs, true, n_valid, n_heads);
+    for (auto& nsp_graph : m_nsp_graphs)
+        nsp_graph.kvmanager->dumpCache(&fs, false, n_valid, n_heads);
+
+    // Dump Quantization parameters - Key scales then Value scales
+    for (auto& nsp_graph : m_nsp_graphs) {
+        std::vector<double>& key_scales = nsp_graph.kvmanager->getKeyScales();
+        fs.write((char*)key_scales.data(), key_scales.size() * sizeof(double));
+    }
+    for (auto& nsp_graph : m_nsp_graphs) {
+        std::vector<double>& value_scales = nsp_graph.kvmanager->getValueScales();
+        fs.write((char*)value_scales.data(), value_scales.size() * sizeof(double));
+    }
+
+    // Saving previous runs history input only applicable in case of tokens.
+    // Embeddings history maintenance is costly in terms of memory and time.
+    if(m_inputType == InputType::TOKENS)
+        fs.write((char*)token_history.data(), n_valid * sizeof(int32_t));
+    else if(m_inputType == InputType::UNKNOWN) {
+        __ERROR("Wrong type of input is found.");
+        return false;
+    }
+
+    // Release the lock
+    for (auto& nsp_graph : m_nsp_graphs)
+        nsp_graph.releaseLock("saveKVCache");
+
+    fs.flush();
+    fs.close();
+
+    return true;
+}
+
+bool QnnNspModel::applyBinarySections(std::vector<std::string>& binsection_list) {
+    //apply binarysection for lora config
+    for (int i = 0; i < binsection_list.size(); i++) {
+        __DEBUG("qnn-htp: applyBinarySections adapters {}", binsection_list.at(i));
+        if (!m_qnnApi->applyBinarySection(i, binsection_list.at(i),m_use_mmap,graph_switching)) {
+            __ERROR("qnn-htp: Error in applyBinarySections {}", i);
+            return false;
+        }
+    }
+    return true;
+}
+
+bool QnnNspModel::applyLoraStrength(const std::string& alpha_tensor_name, const float alpha_val) {
+    if(alpha_tensor_name.empty()) return true;
+    for (auto& variant : m_variant_list) {
+        if (!variant.input_specs.contains(alpha_tensor_name)) continue;
+
+        auto& tspec          = variant.input_specs.at(alpha_tensor_name);
+        auto [scale, offset] = tspec.quantParam[0];
+
+        // clang-format off
+        switch (tspec.dtype) {
+        case QNN_DATATYPE_UFIXED_POINT_8: QnnUtils::quantizeTensorPtr(&alpha_val, (uint8_t*)getBuffer(tspec), offset, scale, 1); break;
+        case QNN_DATATYPE_UFIXED_POINT_16: QnnUtils::quantizeTensorPtr(&alpha_val, (uint16_t*)getBuffer(tspec), offset, scale, 1); break;
+        case QNN_DATATYPE_FLOAT_16: *(uint16_t *)getBuffer(tspec) = fp16_ieee_from_fp32_value(alpha_val); break;
+        default: __ERROR("Unsupported alpha tensor dtype {}", tspec.dtype.str()); return false;
+        }
+        // clang-format on
+        __DEBUG("qnn-htp: applyAlphaTensor alpha = {}", alpha_val);
+        return true; // Each lora bin section should have only one alpha tensor
+    }
+    return false;
+}
+
+bool QnnNspModel::applyLoraAdapter(const std::string& lora_adapter_name) {
+    if (lora_conf != LoraConfigType::LORA_ADAPTER_WEIGHT_ENABLE) {
+        __ERROR("qnn-htp: Lora config is not enable for adapters");
+        return false;
+    }
+
+    if (!lora_config.contains(lora_adapter_name)) {
+        __ERROR("qnn-htp: Could not find lora adapters config to apply ");
+        return false;
+    }
+
+    if (!applyLoraStrength(
+                lora_config[lora_adapter_name].alpha_tensor_name,
+                lora_config[lora_adapter_name].alpha_tensor_val
+        )) {
+        __ERROR("qnn-htp: Could not apply Alpha tensor ");
+        return false;
+    }
+
+    if (!applyBinarySections(lora_config[lora_adapter_name].binsection_list)) {
+        __ERROR("qnn-htp: Could not apply binary Sections ");
+        return false;
+    }
+
+    for (auto& g : m_nsp_graphs) {
+        for (auto& [n, variant] : g.variants) {
+            variant->refreshTensorQuantParams();
+        }
+    }
+
+    return true;
+}
+
+size_t QnnNspModel::getEmbeddings(std::span<float> embds) {
+    qualla::Timer start;
+
+    QnnUtils::Tensor* output_spec = nullptr;
+
+    if(m_pooled_output)
+        output_spec =  m_nsp_graphs.back().variants[run_info.n_tokens]->getOutput(m_layerNames[LayerType::POOL_OUTPUT]);
+    else
+        output_spec =  m_nsp_graphs.back().variants[run_info.n_tokens]->getOutput(m_layerNames[LayerType::SEQ_OUTPUT]);
+
+    if(output_spec == nullptr) {
+        __ERROR("encountered null buffer");
+        throw std::runtime_error("Model is not supporting per token embedding");
+    }
+    const auto scale = output_spec->quantParam[0].scale;
+    const auto offset = output_spec->quantParam[0].offset;
+
+
+    auto output_datatype = QnnUtils::DataType(output_spec->tensor);
+
+    int output_bw = output_spec->dtype.bw();
+
+    uint8_t* output_buffer = (uint8_t*)getBuffer(output_spec);
+
+    const int return_size      = m_pooled_output ? 1 : run_info.n_processed;
+
+    if (!m_pooled_output) {
+        // If multiple tokens embedding are returned, offset to the correct location in the buffer
+        if (run_info.n_tokens == m_ctx_size) {
+            // This was left-padded, tokens embedding are at [n_tokens - n_processed, n_tokens]
+            output_buffer += (run_info.n_tokens - return_size) * m_embd_size * output_bw;
+        } else {
+            // This was right-padded, tokens embedding are at indexes [0, n_processed]
+            output_buffer += (run_info.n_processed - 1) * m_embd_size * output_bw;
+        }
+    }
+
+
+    const int output_len = static_cast<int>(return_size * m_embd_size);
+    __TRACE("qnn-htp: get-embds for {} tokens. scale = {}, offset = {}, Returning {}",
+            run_info.n_processed,
+            scale,
+            offset,
+            output_len);
+
+    switch (output_datatype) {
+        case QNN_DATATYPE_UFIXED_POINT_8:
+            deQuantizeOutputs((uint8_t*)output_buffer, embds, scale, offset, output_len);
+            break;
+        case QNN_DATATYPE_UFIXED_POINT_16:
+            deQuantizeOutputs((uint16_t*)output_buffer, embds, scale, offset, output_len);
+            break;
+        case QNN_DATATYPE_FLOAT_16:
+            castOutputs((uint16_t*)output_buffer, embds, output_len, output_bw);
+            break;
+        case QNN_DATATYPE_FLOAT_32:
+            castOutputs((float*)output_buffer, embds, output_len, output_bw);
+            break;
+        default:
+            __ERROR("Unsupported output datatype");
+    }
+
+    __DEBUG("qnn-htp: getEmbeddings complete : {} usec (return_size={})",
+            start.elapsed_usec(),
+            output_len);
+    return output_len;
+}
+
+// Utility functions to convert structs from/to json for parsing/dumping
+void from_json(const json& j, RopeScalingParams& p) {
+    p.rope_type = Config::optional(j, "rope-type", RopeScalingParams::DEFAULT);
+    if (p.rope_type == RopeScalingParams::ROPE_LLAMA3) {
+        try {
+            j.at("factor").get_to(p.llama3_params.factor);
+            j.at("low-freq-factor").get_to(p.llama3_params.low_freq_factor);
+            j.at("high-freq-factor").get_to(p.llama3_params.high_freq_factor);
+            j.at("original-max-position-embeddings")
+                    .get_to(p.llama3_params.original_max_position_embeddings);
+        } catch (const json::exception& e) {
+            // clang-format off
+            throw std::runtime_error(fmt::format( "Parsing error for llama3 rope scaling - {}\n"
+                    "llama3 requires keys ['original-max-position-embeddings', 'factor', 'low-freq-factor', 'high-freq-factor'].\n"
+                    "Found config - {}", e.what(), j.dump()));
+            // clang-format on
+        }
+    } else if (p.rope_type == RopeScalingParams::ROPE_LONGROPE) {
+        try {
+            j.at("original-max-position-embeddings")
+                    .get_to(p.longrope_params.original_max_position_embeddings);
+            j.at("long-factor").get_to(p.longrope_params.long_factor);
+            j.at("short-factor").get_to(p.longrope_params.short_factor);
+            if (j.contains("factor"))
+                j.at("factor").get_to(p.longrope_params.factor);
+            else
+                p.longrope_params.factor = j.at("max-position-embeddings").get<double>() /
+                                           p.longrope_params.original_max_position_embeddings;
+        } catch (const json::exception& e) {
+            // clang-format off
+            throw std::runtime_error(fmt::format( "Parsing error for longrope scaling - {}\n"
+                    "LongRope requires keys ['original-max-position-embeddings', 'factor' or 'max-position-embeddings', 'long-factor', 'short-factor'].\n"
+                    "Found config - {}", e.what(), j.dump()));
+            // clang-format on
+        }
+    }
+}
+
+void to_json(json& j, const RopeScalingParams& p) {
+    j["rope-type"] = p.rope_type;
+    if (p.rope_type == RopeScalingParams::ROPE_LLAMA3) {
+        j["factor"]                           = p.llama3_params.factor;
+        j["low-freq-factor"]                  = p.llama3_params.low_freq_factor;
+        j["high-freq-factor"]                 = p.llama3_params.high_freq_factor;
+        j["original-max-position-embeddings"] = p.llama3_params.original_max_position_embeddings;
+    } else if (p.rope_type == RopeScalingParams::ROPE_LONGROPE) {
+        j["factor"]                           = p.longrope_params.factor;
+        j["long-factor"]                      = p.longrope_params.long_factor;
+        j["short-factor"]                     = p.longrope_params.short_factor;
+        j["original-max-position-embeddings"] = p.longrope_params.original_max_position_embeddings;
+    }
+}
+
+void from_json(const json& j, PositionalEncoding& p) {
+    p.type = Config::optional(j, "type", PositionalEncoding::ROPE);
+    if (p.type == PositionalEncoding::ROPE) {
+        p.rope_params.dims         = Config::mandatory<int32_t>(j, "rope-dim");
+        p.rope_params.theta        = Config::optional<int32_t>(j, "rope-theta", 10000);
+        p.rope_params.rope_scaling = Config::optional<RopeScalingParams>(j, "rope-scaling", {});
+    }
+}
+
+void to_json(json& j, const PositionalEncoding& p) {
+    j["type"] = p.type;
+    if (p.type == PositionalEncoding::ROPE) {
+        j["rope-dim"]     = p.rope_params.dims;
+        j["rope-theta"]   = p.rope_params.theta;
+        j["rope-scaling"] = p.rope_params.rope_scaling;
+    }
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/engines/qnn-htp/nsp-model.hpp b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-model.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..67eee738ed09a690f14ad73a6921499314040415
--- /dev/null
+++ b/Genie/Genie/src/qualla/engines/qnn-htp/nsp-model.hpp
@@ -0,0 +1,424 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef __QUALLA_NSP_MODEL_H_
+#define __QUALLA_NSP_MODEL_H_
+
+#include <vector>
+#include <string>
+#include <filesystem>
+#include <atomic>
+#include <span>
+
+#include "qualla/env.hpp"
+#include "qualla/detail/threadpool.hpp"
+
+#include "QnnApi.hpp"
+#include "IOTensor.hpp"
+
+#include "nsp-kvdispatcher.hpp"
+#include "qnn-utils.hpp"
+#include "nsp-graph.hpp"
+
+namespace qualla {
+
+enum ModelArchitectureType : uint8_t{
+        DECODER                = 0,
+        ENCODER                = 1
+};
+
+enum LoraConfigType : uint8_t{
+    LORA_DISABLE               = 0,
+    LORA_INPUT_WEIGHT_ENABLE   = 1,
+    LORA_ADAPTER_WEIGHT_ENABLE = 2
+};
+
+static const std::unordered_set<Qnn_DataType_t> supported_activations = {
+        QNN_DATATYPE_UFIXED_POINT_8,
+        QNN_DATATYPE_UFIXED_POINT_16,
+        QNN_DATATYPE_INT_32,
+        QNN_DATATYPE_FLOAT_16
+};
+
+struct RopeScalingParams {
+    enum RopeType { DEFAULT, ROPE_LLAMA3, ROPE_LONGROPE } rope_type = DEFAULT;
+
+    // This should be a union, but running into compilation issues with non-trivial dtr/copy-ctr
+    struct {
+        double factor;
+        double low_freq_factor;
+        double high_freq_factor;
+        int    original_max_position_embeddings;
+    } llama3_params;
+
+    struct {
+        double              factor;
+        std::vector<double> long_factor;
+        std::vector<double> short_factor;
+        int                 original_max_position_embeddings;
+    } longrope_params;
+
+    RopeScalingParams() {}
+};
+
+NLOHMANN_JSON_SERIALIZE_ENUM(
+        RopeScalingParams::RopeType,
+        {{RopeScalingParams::DEFAULT, "default"},
+         {RopeScalingParams::ROPE_LLAMA3, "llama3"},
+         {RopeScalingParams::ROPE_LONGROPE, "longrope"}}
+)
+
+struct PositionalEncoding {
+    enum EncodingType : uint8_t { ROPE = 0x0, ABSOLUTE = 0x1, ALIBI = 0x2, UNDEFINED = 0xff } type;
+    struct {
+        int32_t           dims;
+        double            theta;
+        RopeScalingParams rope_scaling;
+    } rope_params;
+
+    PositionalEncoding() { type = ROPE; }
+};
+
+NLOHMANN_JSON_SERIALIZE_ENUM(
+        PositionalEncoding::EncodingType,
+        {{PositionalEncoding::UNDEFINED, "undefined"},
+         {PositionalEncoding::ROPE, "rope"},
+         {PositionalEncoding::ABSOLUTE, "absolute"},
+         {PositionalEncoding::ALIBI, "alibi"}}
+)
+
+void from_json(const json& j, PositionalEncoding& p);
+void to_json(json& j, const PositionalEncoding& p);
+void from_json(const json& j, RopeScalingParams& p);
+void to_json(json& j, const RopeScalingParams& p);
+
+class QnnNspModel {
+  protected:
+    Env& _env;
+
+    // Populated by allocateTensors()
+    // Maps tensor name to allocation block index and block offset
+    std::map<std::string, std::pair<int, size_t>> tensor_alloc_info;
+    bool float32ToFloat16(uint8_t* out,
+                         float* in,
+                         size_t numElements);
+
+    int32_t input_width    = 1;
+    int32_t input_channel  = 1;
+    int32_t input_bitWidth = 4;
+
+    int32_t embedding_length = -1;
+    std::string embedding_datatype{"float32"};
+
+    // Maps layers to their tensor names.
+    std::map<LayerType, std::string> m_layerNames {
+      {LayerType::INPUT, "input_ids"},
+      {LayerType::OUTPUT, "logits"},
+      {LayerType::TOKEN_TYPE_IDS, "token_type_ids"},
+      {LayerType::POOL_OUTPUT,"pooled_output"},
+      {LayerType::SEQ_OUTPUT,"sequence_output"},
+      {LayerType::ATTN_MASK, "attention_mask"},
+      {LayerType::POS_SIN, "position_ids_sin"},
+      {LayerType::POS_COS, "position_ids_cos"},
+      {LayerType::POS_IDS, "position_ids"}
+    };
+
+    std::vector<uint8_t> m_eosEmbedding;
+  public:
+    struct LoraConfig {
+        std::string              lora_name;
+        std::vector<std::string> binsection_list;   //loarv2 adapter bins filenames
+        std::string              path;              //lorav1 weights directory.
+        std::string              alpha_tensor_name; // loarv2 alpha tensor names
+        float                    alpha_tensor_val;  //loarv2 alpha tensor values
+    };
+    struct Params {
+        ModelArchitectureType      modelArchitectureType; // Model architecture
+        std::filesystem::path      model_basedir;      // model basedir
+        std::vector<std::string>   model_list;         // model filenames
+        std::map<int32_t, int32_t> variant_latency;    // latency for different variants
+        std::vector<std::string>   exec_select_graphs; // Execute selected graphs
+        bool load_select_graphs; // Load only graphs mentioned in exec_select_graphs from the context bin, by default all graphs are loaded
+
+        bool                              use_mmap;
+        bool                              use_async_Init;
+        uint64_t                          mmap_budget;
+        int64_t                           spill_fill_bufsize;
+        int32_t                           ctx_size;
+        int32_t                           kv_dim;
+        int32_t                           pad_token;
+        size_t                            n_embd;
+        uint32_t                          n_threads{0};
+        uint64_t                          cpumask{0};
+        bool                              poll{false};
+        std::string                       backend_lib;
+        std::string                       backend_ext_conf;
+        std::string                       debug_path;
+        bool                              debug_specs;
+        bool                              debug_tensors;
+        bool                              debug_outputs;
+        bool                              debug_qnn;
+        std::string                       kv_update_method;
+        std::string                       lmhead_weight_dir;
+        bool                              graph_switching;
+        LoraConfigType                    lora_config_type;
+        std::map<std::string, LoraConfig> lora_param;
+        std::string                       input_layer_name;
+        int32_t                           embedding_length;
+        std::string                       embedding_datatype;
+        bool                              pooled_output;
+        bool                              disable_kv_cache;
+        // Parameters for positional encodings
+        PositionalEncoding positional_encoding_params;
+    };
+
+    const std::filesystem::path model_basedir;
+    std::vector<std::string>    model_filelist;
+    std::string                 lmhead_weight_dir;
+    std::vector<int32_t>        token_history;
+    std::map<int32_t, int32_t>  variant_latency;
+    std::vector<std::string>    exec_select_graphs;
+    bool                        load_select_graphs;
+
+    InputType m_inputType{InputType::UNKNOWN};
+
+    LoraConfigType                     lora_conf;
+    std::map<std::string, LoraConfig>  lora_config;
+    // QNN specific variables
+    const bool                m_sharedBuffer{true};
+    std::unique_ptr<QnnApi>   m_qnnApi;
+    std::unique_ptr<IOTensor> m_ioTensor{nullptr};
+    int64_t                   spill_fill_buffer_size;
+    bool                      m_use_mmap{false};
+    bool                      m_use_async_Init{true};
+    uint64_t                  mmap_budget;
+    bool                      graph_switching{false};
+    size_t                    n_embd;
+
+
+    bool m_pooled_output{true};
+    bool m_disableKvCache{false};
+    // Model parameters
+    ModelArchitectureType m_modelArchitectureType;
+    int32_t m_ctx_size{-1};
+    int32_t m_vocab_size{-1};
+    int32_t m_kv_dim{-1};
+    int32_t m_embd_size{-1};
+    int32_t m_pad_token{-1};
+
+    size_t m_embeddingBufferSize{0};
+
+    QnnUtils::DataType d_input{QNN_DATATYPE_INT_32}, d_kv{QNN_DATATYPE_UFIXED_POINT_8},
+            d_attn_map{QNN_DATATYPE_UFIXED_POINT_16}, d_token_type{QNN_DATATYPE_INT_32};
+
+    // int32_t attention_mask_bitwidth{2}, position_id_bitwidth{2};
+
+    // Information regarding model execution settings and last inference
+    struct RunInfo {
+        int32_t n_tokens;
+        size_t  n_processed;
+
+        std::vector<int32_t> tokens;
+    } run_info{-1, 0, {}};
+
+    // Model specific variables
+    uint32_t m_num_graphs;
+    bool     _lora_enabled{false};
+    bool     _lmhead_weight_input{false};
+
+    // QnnNspGraph contains all GraphVariants for a specific split (with index=split_idx)
+    std::vector<QnnNspGraph> m_nsp_graphs;
+    // GraphVariant represents one input size within one split (e.g. KV$_split_1)
+    std::vector<GraphVariant> m_variant_list;
+
+    // For ease of usage: Map from graph name to the corresponding GraphVariant
+    std::unordered_map<std::string, GraphVariant*> m_graph_map;
+    // This map records how many graphs have been loaded for a particular input size
+    std::map<int32_t, int32_t> nsp_graph_count;
+
+    bool       _threaded{false};
+    uint64_t   _cpumask{0};
+    ThreadPool threadpool;
+
+    KVManagerMode _kv_update_method{POINTER_SHIFT};
+
+    int32_t                       _kv_update_count{0};
+    std::unique_ptr<KVDispatcher> _kv_dispatcher;
+
+    std::string _backend_lib;
+    std::string _backend_ext_conf;
+
+    // Store some pointers for easier access
+    QnnUtils::Tensor* t_input_ids{nullptr};
+    QnnUtils::Tensor* t_attn_mask{nullptr};
+    QnnUtils::Tensor* t_token_type_ids{nullptr};
+
+    // Variables for positional encodings
+    PositionalEncoding m_positional_encoding;
+    QnnUtils::DataType d_pos{QNN_DATATYPE_UFIXED_POINT_16};
+    // PositionalEncodingType::ABSOLUTE OR PositionalEncodingType::ALIBI
+    QnnUtils::Tensor* t_position_ids{nullptr};
+    // PositionalEncodingType::ROPE variables
+    int32_t m_pos_dim{-1};       // Dimension of positional embedding tensor (incl partial_factor)
+    double  rope_theta{10000.0}; // Base theta parameter for RoPE calculations
+    void*   rope_sin{nullptr};   // Pre-calculated RoPE sin table of size [ctx_size, m_pos_dim]
+    void*   rope_cos{nullptr};   // Pre-calculated RoPE cos table of size [ctx_size, m_pos_dim]
+    RopeScalingParams rope_scaling; // RoPE scaling parameters
+
+    QnnUtils::Tensor* t_position_ids_sin{nullptr};
+    QnnUtils::Tensor* t_position_ids_cos{nullptr};
+
+    // n_past defines number of population of kvcache
+    size_t m_nPast{0};
+
+    // Self-Specualtive Decoding
+    // This prefix is not for input tokens, but just for speical tokens
+    // Only the special tokens from the offset should attend the kv prefix
+    int32_t _size_to_skip_kv_prefix{0};
+    int32_t _offset_to_apply_kv_prefix{0};
+
+    // Keep track of inference count
+    int m_inference_count = 0;
+
+    // Debug mode settings
+    bool        _debug_specs{false};
+    bool        _debug_tensors{false};
+    bool        _debug_outputs{false};
+    bool        _debug_qnn{false};
+    std::string _debug_path;
+
+    QnnNspModel(Env& env, const Params& params);
+
+    ~QnnNspModel();
+
+    bool initializeModel(void);
+    bool validateModel(void);
+    bool initializeIOTensors(void);
+    bool initializeTensorPointers();
+    bool initializeKVManager();
+    bool calculate_rope_embeddings(void);
+    bool load_lmhead_weight_as_input(void);
+    bool flushLoraWeightsBuffers(void);
+
+    template <typename DType>
+    bool setupAttentionMask(
+            bool                     pad_left,
+            int                      n_tokens,
+            int                      n_inputs,
+            int                      n_past,
+            std::span<const int32_t> attention_map,
+            size_t                   n_skip_prefix,
+            size_t                   n_apply_prefix_offset
+    );
+
+    bool setupAttentionMaskFP16(
+	   bool                      pad_left,
+	   int                       n_tokens,
+	   int                       n_inputs,
+  	   int                       n_past,
+	   std::span<const int32_t>  attention_map,
+	   size_t                    n_skip_prefix,
+	   size_t                    n_apply_prefix_offset);
+
+    bool setupRopePositionEmbeddingFP16(
+            bool                     pad_left,
+            int                      n_tokens,
+            int                      n_inputs,
+            int                      n_past,
+            std::span<const int32_t> attention_map,
+            size_t                   n_skip_prefix,
+            size_t                   n_apply_prefix_offset
+    );
+
+    template <typename DType>
+    bool setupRopePositionEmbedding(
+            bool                     pad_left,
+            int                      n_tokens,
+            int                      n_inputs,
+            int                      n_past,
+            std::span<const int32_t> attention_map,
+            size_t                   n_skip_prefix,
+            size_t                   n_apply_prefix_offset
+    );
+
+    template <typename DType>
+    bool setupAlibiPositionEmbedding(
+            bool pad_left,
+            int n_tokens,
+            int n_inputs,
+            int n_past
+    );
+
+    bool setupInputTensors(
+            std::span<int32_t>       tokens,
+            int32_t                  n_past,
+            std::span<const int32_t> attention_map,
+            size_t                   n_skip_prefix,
+            size_t                   n_apply_prefix_offset
+    );
+
+    bool setupInputTensors(
+            std::span<uint8_t>       embedding,
+            int32_t                  n_past,
+            std::span<const int32_t> attention_map,
+            size_t                   n_skip_prefix,
+            size_t                   n_apply_prefix_offset
+    );
+
+    bool quantizeInput(float* in, size_t tensorOffset, size_t length);
+
+    size_t getEmbeddingBufferSize();
+
+    size_t runInference(
+            const std::vector<int32_t>& tokens,
+            const std::vector<int32_t>& attention_map,
+            std::vector<float>&         output,
+            bool                        output_all = false
+    );
+
+    size_t runInference(
+        std::vector<uint8_t>&       embeddings,
+        const std::vector<int32_t>& attention_map,
+        std::vector<float>&         output,
+        bool                        output_all = false
+    );
+    
+    bool cacheEosEmbedding(std::vector<uint8_t>& eosEmbedding);
+
+    bool setKVCacheNPast(size_t n_past, const std::vector<bool>& selected);
+
+    size_t getEmbeddings(std::span<float> embds);
+
+    size_t getDequantLogits(std::span<float> logits, bool logits_all = false);
+
+    bool debugOutputs(QnnUtils::Tensor* outTensor, std::string& outTensorName);
+
+    size_t loadKVCache(const std::string& load_path);
+    bool   saveKVCache(const std::string& save_path);
+    bool   applyLoraStrength(const std::string& alpha_tensor_name, const float alpha_val);
+    bool   applyLoraAdapter(const std::string& lora_adapter_name);
+    bool   applyBinarySections(std::vector<std::string>& binsection_list);
+    bool   applyLoraWeights(const std::string& lora_weights_name);
+
+  protected:
+    // Internal functions to separate different runInference logic
+    int32_t selectVariantStrategy(int32_t n_inputs, int32_t n_past, int32_t cur_variant);
+    bool    runInferenceHelper(bool pipeline, int32_t* total_wait, int32_t* total_exec);
+
+    inline bool  updateTensorPointer(GraphVariant& variant, std::string& key, QnnUtils::Tensor*& t);
+    inline void* getBuffer(QnnUtils::Tensor& spec) { return m_ioTensor->getBuffer(spec.tensor); }
+    inline void* getBuffer(QnnUtils::Tensor* spec) { return m_ioTensor->getBuffer(spec->tensor); }
+    inline size_t getBufferSize(QnnUtils::Tensor& spec) { return spec.dims.getSize(); }
+    inline size_t getBufferSize(QnnUtils::Tensor* spec) { return spec->dims.getSize(); }
+
+    void dumpTensorSpecs();
+};
+
+} // namespace qualla
+
+#endif
diff --git a/Genie/Genie/src/qualla/env.cpp b/Genie/Genie/src/qualla/env.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ef8bf21d23df24df620df2646cd00d22081abc95
--- /dev/null
+++ b/Genie/Genie/src/qualla/env.cpp
@@ -0,0 +1,51 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/env.hpp>
+
+#include <iostream>
+
+namespace fs = std::filesystem;
+
+namespace qualla {
+
+Env::Env(const json& conf) {
+    _path.models = fs::path();
+    _path.cache  = fs::path();
+
+    if (conf.contains("path")) {
+        const json& p = conf["path"];
+
+        if (p.contains("models"))
+            _path.models = fs::path(p["models"].get<std::string>()).make_preferred();
+        if (p.contains("cache"))
+            _path.cache = fs::path(p["cache"].get<std::string>()).make_preferred();
+    }
+
+    using qc = qualla::Config;
+
+    // Create logger
+    const qualla::json& log_conf = qc::optional<qualla::json>(conf, "log", {});
+    _logger                      = Logger::create(log_conf);
+}
+
+Env::~Env() {}
+
+std::shared_ptr<Env> Env::create(const qualla::json& conf) {
+    return std::make_shared<Env>(conf);
+}
+
+std::shared_ptr<Env> Env::create(std::istream& json_stream) {
+    return create(json::parse(json_stream));
+}
+
+std::shared_ptr<Env> Env::create(const std::string& json_str) {
+    return create(json::parse(json_str));
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/gpio-marker.cpp b/Genie/Genie/src/qualla/gpio-marker.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..63f52241f9b5df22257a7a926b851c4dced76b2d
--- /dev/null
+++ b/Genie/Genie/src/qualla/gpio-marker.cpp
@@ -0,0 +1,66 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "qualla/detail/gpio-marker.hpp"
+#include "fmt/format.h"
+
+namespace fs = std::filesystem;
+
+namespace qualla {
+
+GpioMarker::GpioMarker(const json& conf) {
+    // Parse config
+    using qc = qualla::Config;
+
+    _tool_path = qc::optional<std::string>(conf, "tool-path", "");
+    _command   = qc::optional<std::string>(conf, "command", "");
+    _gpio_num  = qc::optional<int32_t>(conf, "gpio-num", -1);
+
+    if (!_tool_path.empty()) {
+        if (fs::exists(_tool_path)) {
+            _gpio_marker_enable = true;
+            reset();
+        } else {
+            _gpio_marker_enable = false;
+        }
+    } else {
+        _gpio_marker_enable = false;
+    }
+}
+
+GpioMarker::~GpioMarker() {}
+
+void GpioMarker::set() {
+    if (!_gpio_marker_enable) return;
+
+    _gpio_status    = !_gpio_status;
+    std::string cmd = fmt::format("{} {} {}={}", _tool_path, _command, _gpio_num, _gpio_status);
+    system(cmd.c_str());
+}
+
+void GpioMarker::reset() {
+    if (!_gpio_marker_enable) return;
+
+    std::string cmd = fmt::format("{} {} {}=0", _tool_path, _command, _gpio_num);
+    system(cmd.c_str());
+    _gpio_status = 0;
+}
+
+std::unique_ptr<GpioMarker> GpioMarker::create(const qualla::json& conf) {
+    return std::make_unique<GpioMarker>(conf);
+}
+
+std::unique_ptr<GpioMarker> GpioMarker::create(std::istream& json_stream) {
+    return create(json::parse(json_stream));
+}
+
+std::unique_ptr<GpioMarker> GpioMarker::create(const std::string& json_str) {
+    return create(json::parse(json_str));
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/include/fmt/core.h b/Genie/Genie/src/qualla/include/fmt/core.h
new file mode 100644
index 0000000000000000000000000000000000000000..f9e3b7d6dc1632c0596194f22218c976deedea54
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/fmt/core.h
@@ -0,0 +1,2922 @@
+// Formatting library for C++ - the core API for char/UTF-8
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_CORE_H_
+#define FMT_CORE_H_
+
+#include <cstddef>  // std::byte
+#include <cstdio>   // std::FILE
+#include <cstring>  // std::strlen
+#include <iterator>
+#include <limits>
+#include <memory>  // std::addressof
+#include <string>
+#include <type_traits>
+
+// The fmt library version in the form major * 10000 + minor * 100 + patch.
+#define FMT_VERSION 100101
+
+#if defined(__clang__) && !defined(__ibmxl__)
+#  define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
+#else
+#  define FMT_CLANG_VERSION 0
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && \
+    !defined(__NVCOMPILER)
+#  define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#  define FMT_GCC_VERSION 0
+#endif
+
+#ifndef FMT_GCC_PRAGMA
+// Workaround _Pragma bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59884.
+#  if FMT_GCC_VERSION >= 504
+#    define FMT_GCC_PRAGMA(arg) _Pragma(arg)
+#  else
+#    define FMT_GCC_PRAGMA(arg)
+#  endif
+#endif
+
+#ifdef __ICL
+#  define FMT_ICC_VERSION __ICL
+#elif defined(__INTEL_COMPILER)
+#  define FMT_ICC_VERSION __INTEL_COMPILER
+#else
+#  define FMT_ICC_VERSION 0
+#endif
+
+#ifdef _MSC_VER
+#  define FMT_MSC_VERSION _MSC_VER
+#  define FMT_MSC_WARNING(...) __pragma(warning(__VA_ARGS__))
+#else
+#  define FMT_MSC_VERSION 0
+#  define FMT_MSC_WARNING(...)
+#endif
+
+#ifdef _MSVC_LANG
+#  define FMT_CPLUSPLUS _MSVC_LANG
+#else
+#  define FMT_CPLUSPLUS __cplusplus
+#endif
+
+#ifdef __has_feature
+#  define FMT_HAS_FEATURE(x) __has_feature(x)
+#else
+#  define FMT_HAS_FEATURE(x) 0
+#endif
+
+#if defined(__has_include) || FMT_ICC_VERSION >= 1600 || FMT_MSC_VERSION > 1900
+#  define FMT_HAS_INCLUDE(x) __has_include(x)
+#else
+#  define FMT_HAS_INCLUDE(x) 0
+#endif
+
+#ifdef __has_cpp_attribute
+#  define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+#  define FMT_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+
+#define FMT_HAS_CPP14_ATTRIBUTE(attribute) \
+  (FMT_CPLUSPLUS >= 201402L && FMT_HAS_CPP_ATTRIBUTE(attribute))
+
+#define FMT_HAS_CPP17_ATTRIBUTE(attribute) \
+  (FMT_CPLUSPLUS >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute))
+
+// Check if relaxed C++14 constexpr is supported.
+// GCC doesn't allow throw in constexpr until version 6 (bug 67371).
+#ifndef FMT_USE_CONSTEXPR
+#  if (FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VERSION >= 1912 || \
+       (FMT_GCC_VERSION >= 600 && FMT_CPLUSPLUS >= 201402L)) &&             \
+      !FMT_ICC_VERSION && (!defined(__NVCC__) || FMT_CPLUSPLUS >= 202002L)
+#    define FMT_USE_CONSTEXPR 1
+#  else
+#    define FMT_USE_CONSTEXPR 0
+#  endif
+#endif
+#if FMT_USE_CONSTEXPR
+#  define FMT_CONSTEXPR constexpr
+#else
+#  define FMT_CONSTEXPR
+#endif
+
+#if ((FMT_CPLUSPLUS >= 202002L) &&                            \
+     (!defined(_GLIBCXX_RELEASE) || _GLIBCXX_RELEASE > 9)) || \
+    (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002)
+#  define FMT_CONSTEXPR20 constexpr
+#else
+#  define FMT_CONSTEXPR20
+#endif
+
+// Check if constexpr std::char_traits<>::{compare,length} are supported.
+#if defined(__GLIBCXX__)
+#  if FMT_CPLUSPLUS >= 201703L && defined(_GLIBCXX_RELEASE) && \
+      _GLIBCXX_RELEASE >= 7  // GCC 7+ libstdc++ has _GLIBCXX_RELEASE.
+#    define FMT_CONSTEXPR_CHAR_TRAITS constexpr
+#  endif
+#elif defined(_LIBCPP_VERSION) && FMT_CPLUSPLUS >= 201703L && \
+    _LIBCPP_VERSION >= 4000
+#  define FMT_CONSTEXPR_CHAR_TRAITS constexpr
+#elif FMT_MSC_VERSION >= 1914 && FMT_CPLUSPLUS >= 201703L
+#  define FMT_CONSTEXPR_CHAR_TRAITS constexpr
+#endif
+#ifndef FMT_CONSTEXPR_CHAR_TRAITS
+#  define FMT_CONSTEXPR_CHAR_TRAITS
+#endif
+
+// Check if exceptions are disabled.
+#ifndef FMT_EXCEPTIONS
+#  if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \
+      (FMT_MSC_VERSION && !_HAS_EXCEPTIONS)
+#    define FMT_EXCEPTIONS 0
+#  else
+#    define FMT_EXCEPTIONS 1
+#  endif
+#endif
+
+// Disable [[noreturn]] on MSVC/NVCC because of bogus unreachable code warnings.
+#if FMT_EXCEPTIONS && FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VERSION && \
+    !defined(__NVCC__)
+#  define FMT_NORETURN [[noreturn]]
+#else
+#  define FMT_NORETURN
+#endif
+
+#ifndef FMT_NODISCARD
+#  if FMT_HAS_CPP17_ATTRIBUTE(nodiscard)
+#    define FMT_NODISCARD [[nodiscard]]
+#  else
+#    define FMT_NODISCARD
+#  endif
+#endif
+
+#ifndef FMT_INLINE
+#  if FMT_GCC_VERSION || FMT_CLANG_VERSION
+#    define FMT_INLINE inline __attribute__((always_inline))
+#  else
+#    define FMT_INLINE inline
+#  endif
+#endif
+
+#ifdef _MSC_VER
+#  define FMT_UNCHECKED_ITERATOR(It) \
+    using _Unchecked_type = It  // Mark iterator as checked.
+#else
+#  define FMT_UNCHECKED_ITERATOR(It) using unchecked_type = It
+#endif
+
+#ifndef FMT_BEGIN_NAMESPACE
+#  define FMT_BEGIN_NAMESPACE \
+    namespace fmt {           \
+    inline namespace v10 {
+#  define FMT_END_NAMESPACE \
+    }                       \
+    }
+#endif
+
+#ifndef FMT_EXPORT
+#  define FMT_EXPORT
+#  define FMT_BEGIN_EXPORT
+#  define FMT_END_EXPORT
+#endif
+
+#if !defined(FMT_HEADER_ONLY) && defined(_WIN32)
+#  ifdef FMT_LIB_EXPORT
+#    define FMT_API __declspec(dllexport)
+#  elif defined(FMT_SHARED)
+#    define FMT_API __declspec(dllimport)
+#  endif
+#else
+#  if defined(FMT_LIB_EXPORT) || defined(FMT_SHARED)
+#    if defined(__GNUC__) || defined(__clang__)
+#      define FMT_API __attribute__((visibility("default")))
+#    endif
+#  endif
+#endif
+#ifndef FMT_API
+#  define FMT_API
+#endif
+
+// libc++ supports string_view in pre-c++17.
+#if FMT_HAS_INCLUDE(<string_view>) && \
+    (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION))
+#  include <string_view>
+#  define FMT_USE_STRING_VIEW
+#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L
+#  include <experimental/string_view>
+#  define FMT_USE_EXPERIMENTAL_STRING_VIEW
+#endif
+
+#ifndef FMT_UNICODE
+#  define FMT_UNICODE !FMT_MSC_VERSION
+#endif
+
+#ifndef FMT_CONSTEVAL
+#  if ((FMT_GCC_VERSION >= 1000 || FMT_CLANG_VERSION >= 1101) && \
+       (!defined(__apple_build_version__) ||                     \
+        __apple_build_version__ >= 14000029L) &&                 \
+       FMT_CPLUSPLUS >= 202002L) ||                              \
+      (defined(__cpp_consteval) &&                               \
+       (!FMT_MSC_VERSION || _MSC_FULL_VER >= 193030704))
+// consteval is broken in MSVC before VS2022 and Apple clang before 14.
+#    define FMT_CONSTEVAL consteval
+#    define FMT_HAS_CONSTEVAL
+#  else
+#    define FMT_CONSTEVAL
+#  endif
+#endif
+
+#ifndef FMT_USE_NONTYPE_TEMPLATE_ARGS
+#  if defined(__cpp_nontype_template_args) &&                  \
+      ((FMT_GCC_VERSION >= 903 && FMT_CPLUSPLUS >= 201709L) || \
+       __cpp_nontype_template_args >= 201911L) &&              \
+      !defined(__NVCOMPILER) && !defined(__LCC__)
+#    define FMT_USE_NONTYPE_TEMPLATE_ARGS 1
+#  else
+#    define FMT_USE_NONTYPE_TEMPLATE_ARGS 0
+#  endif
+#endif
+
+// Enable minimal optimizations for more compact code in debug mode.
+FMT_GCC_PRAGMA("GCC push_options")
+#if !defined(__OPTIMIZE__) && !defined(__NVCOMPILER) && !defined(__LCC__) && \
+    !defined(__CUDACC__)
+FMT_GCC_PRAGMA("GCC optimize(\"Og\")")
+#endif
+
+FMT_BEGIN_NAMESPACE
+
+// Implementations of enable_if_t and other metafunctions for older systems.
+template <bool B, typename T = void>
+using enable_if_t = typename std::enable_if<B, T>::type;
+template <bool B, typename T, typename F>
+using conditional_t = typename std::conditional<B, T, F>::type;
+template <bool B> using bool_constant = std::integral_constant<bool, B>;
+template <typename T>
+using remove_reference_t = typename std::remove_reference<T>::type;
+template <typename T>
+using remove_const_t = typename std::remove_const<T>::type;
+template <typename T>
+using remove_cvref_t = typename std::remove_cv<remove_reference_t<T>>::type;
+template <typename T> struct type_identity { using type = T; };
+template <typename T> using type_identity_t = typename type_identity<T>::type;
+template <typename T>
+using underlying_t = typename std::underlying_type<T>::type;
+
+// Checks whether T is a container with contiguous storage.
+template <typename T> struct is_contiguous : std::false_type {};
+template <typename Char>
+struct is_contiguous<std::basic_string<Char>> : std::true_type {};
+
+struct monostate {
+  constexpr monostate() {}
+};
+
+// An enable_if helper to be used in template parameters which results in much
+// shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed
+// to workaround a bug in MSVC 2019 (see #1140 and #1186).
+#ifdef FMT_DOC
+#  define FMT_ENABLE_IF(...)
+#else
+#  define FMT_ENABLE_IF(...) fmt::enable_if_t<(__VA_ARGS__), int> = 0
+#endif
+
+// This is defined in core.h instead of format.h to avoid injecting in std.
+// It is a template to avoid undesirable implicit conversions to std::byte.
+#ifdef __cpp_lib_byte
+template <typename T, FMT_ENABLE_IF(std::is_same<T, std::byte>::value)>
+inline auto format_as(T b) -> unsigned char {
+  return static_cast<unsigned char>(b);
+}
+#endif
+
+namespace detail {
+// Suppresses "unused variable" warnings with the method described in
+// https://herbsutter.com/2009/10/18/mailbag-shutting-up-compiler-warnings/.
+// (void)var does not work on many Intel compilers.
+template <typename... T> FMT_CONSTEXPR void ignore_unused(const T&...) {}
+
+constexpr FMT_INLINE auto is_constant_evaluated(
+    bool default_value = false) noexcept -> bool {
+// Workaround for incompatibility between libstdc++ consteval-based
+// std::is_constant_evaluated() implementation and clang-14.
+// https://github.com/fmtlib/fmt/issues/3247
+#if FMT_CPLUSPLUS >= 202002L && defined(_GLIBCXX_RELEASE) && \
+    _GLIBCXX_RELEASE >= 12 &&                                \
+    (FMT_CLANG_VERSION >= 1400 && FMT_CLANG_VERSION < 1500)
+  ignore_unused(default_value);
+  return __builtin_is_constant_evaluated();
+#elif defined(__cpp_lib_is_constant_evaluated)
+  ignore_unused(default_value);
+  return std::is_constant_evaluated();
+#else
+  return default_value;
+#endif
+}
+
+// Suppresses "conditional expression is constant" warnings.
+template <typename T> constexpr FMT_INLINE auto const_check(T value) -> T {
+  return value;
+}
+
+FMT_NORETURN FMT_API void assert_fail(const char* file, int line,
+                                      const char* message);
+
+#ifndef FMT_ASSERT
+#  ifdef NDEBUG
+// FMT_ASSERT is not empty to avoid -Wempty-body.
+#    define FMT_ASSERT(condition, message) \
+      fmt::detail::ignore_unused((condition), (message))
+#  else
+#    define FMT_ASSERT(condition, message)                                    \
+      ((condition) /* void() fails with -Winvalid-constexpr on clang 4.0.1 */ \
+           ? (void)0                                                          \
+           : fmt::detail::assert_fail(__FILE__, __LINE__, (message)))
+#  endif
+#endif
+
+#if defined(FMT_USE_STRING_VIEW)
+template <typename Char> using std_string_view = std::basic_string_view<Char>;
+#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW)
+template <typename Char>
+using std_string_view = std::experimental::basic_string_view<Char>;
+#else
+template <typename T> struct std_string_view {};
+#endif
+
+#ifdef FMT_USE_INT128
+// Do nothing.
+#elif defined(__SIZEOF_INT128__) && !defined(__NVCC__) && \
+    !(FMT_CLANG_VERSION && FMT_MSC_VERSION)
+#  define FMT_USE_INT128 1
+using int128_opt = __int128_t;  // An optional native 128-bit integer.
+using uint128_opt = __uint128_t;
+template <typename T> inline auto convert_for_visit(T value) -> T {
+  return value;
+}
+#else
+#  define FMT_USE_INT128 0
+#endif
+#if !FMT_USE_INT128
+enum class int128_opt {};
+enum class uint128_opt {};
+// Reduce template instantiations.
+template <typename T> auto convert_for_visit(T) -> monostate { return {}; }
+#endif
+
+// Casts a nonnegative integer to unsigned.
+template <typename Int>
+FMT_CONSTEXPR auto to_unsigned(Int value) ->
+    typename std::make_unsigned<Int>::type {
+  FMT_ASSERT(std::is_unsigned<Int>::value || value >= 0, "negative value");
+  return static_cast<typename std::make_unsigned<Int>::type>(value);
+}
+
+FMT_CONSTEXPR inline auto is_utf8() -> bool {
+  FMT_MSC_WARNING(suppress : 4566) constexpr unsigned char section[] = "\u00A7";
+
+  // Avoid buggy sign extensions in MSVC's constant evaluation mode (#2297).
+  using uchar = unsigned char;
+  return FMT_UNICODE || (sizeof(section) == 3 && uchar(section[0]) == 0xC2 &&
+                         uchar(section[1]) == 0xA7);
+}
+}  // namespace detail
+
+/**
+  An implementation of ``std::basic_string_view`` for pre-C++17. It provides a
+  subset of the API. ``fmt::basic_string_view`` is used for format strings even
+  if ``std::string_view`` is available to prevent issues when a library is
+  compiled with a different ``-std`` option than the client code (which is not
+  recommended).
+ */
+FMT_EXPORT
+template <typename Char> class basic_string_view {
+ private:
+  const Char* data_;
+  size_t size_;
+
+ public:
+  using value_type = Char;
+  using iterator = const Char*;
+
+  constexpr basic_string_view() noexcept : data_(nullptr), size_(0) {}
+
+  /** Constructs a string reference object from a C string and a size. */
+  constexpr basic_string_view(const Char* s, size_t count) noexcept
+      : data_(s), size_(count) {}
+
+  /**
+    \rst
+    Constructs a string reference object from a C string computing
+    the size with ``std::char_traits<Char>::length``.
+    \endrst
+   */
+  FMT_CONSTEXPR_CHAR_TRAITS
+  FMT_INLINE
+  basic_string_view(const Char* s)
+      : data_(s),
+        size_(detail::const_check(std::is_same<Char, char>::value &&
+                                  !detail::is_constant_evaluated(true))
+                  ? std::strlen(reinterpret_cast<const char*>(s))
+                  : std::char_traits<Char>::length(s)) {}
+
+  /** Constructs a string reference from a ``std::basic_string`` object. */
+  template <typename Traits, typename Alloc>
+  FMT_CONSTEXPR basic_string_view(
+      const std::basic_string<Char, Traits, Alloc>& s) noexcept
+      : data_(s.data()), size_(s.size()) {}
+
+  template <typename S, FMT_ENABLE_IF(std::is_same<
+                                      S, detail::std_string_view<Char>>::value)>
+  FMT_CONSTEXPR basic_string_view(S s) noexcept
+      : data_(s.data()), size_(s.size()) {}
+
+  /** Returns a pointer to the string data. */
+  constexpr auto data() const noexcept -> const Char* { return data_; }
+
+  /** Returns the string size. */
+  constexpr auto size() const noexcept -> size_t { return size_; }
+
+  constexpr auto begin() const noexcept -> iterator { return data_; }
+  constexpr auto end() const noexcept -> iterator { return data_ + size_; }
+
+  constexpr auto operator[](size_t pos) const noexcept -> const Char& {
+    return data_[pos];
+  }
+
+  FMT_CONSTEXPR void remove_prefix(size_t n) noexcept {
+    data_ += n;
+    size_ -= n;
+  }
+
+  FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(
+      basic_string_view<Char> sv) const noexcept {
+    return size_ >= sv.size_ &&
+           std::char_traits<Char>::compare(data_, sv.data_, sv.size_) == 0;
+  }
+  FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(Char c) const noexcept {
+    return size_ >= 1 && std::char_traits<Char>::eq(*data_, c);
+  }
+  FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(const Char* s) const {
+    return starts_with(basic_string_view<Char>(s));
+  }
+
+  // Lexicographically compare this string reference to other.
+  FMT_CONSTEXPR_CHAR_TRAITS auto compare(basic_string_view other) const -> int {
+    size_t str_size = size_ < other.size_ ? size_ : other.size_;
+    int result = std::char_traits<Char>::compare(data_, other.data_, str_size);
+    if (result == 0)
+      result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1);
+    return result;
+  }
+
+  FMT_CONSTEXPR_CHAR_TRAITS friend auto operator==(basic_string_view lhs,
+                                                   basic_string_view rhs)
+      -> bool {
+    return lhs.compare(rhs) == 0;
+  }
+  friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) != 0;
+  }
+  friend auto operator<(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) < 0;
+  }
+  friend auto operator<=(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) <= 0;
+  }
+  friend auto operator>(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) > 0;
+  }
+  friend auto operator>=(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) >= 0;
+  }
+};
+
+FMT_EXPORT
+using string_view = basic_string_view<char>;
+
+/** Specifies if ``T`` is a character type. Can be specialized by users. */
+FMT_EXPORT
+template <typename T> struct is_char : std::false_type {};
+template <> struct is_char<char> : std::true_type {};
+
+namespace detail {
+
+// A base class for compile-time strings.
+struct compile_string {};
+
+template <typename S>
+struct is_compile_string : std::is_base_of<compile_string, S> {};
+
+template <typename Char, FMT_ENABLE_IF(is_char<Char>::value)>
+FMT_INLINE auto to_string_view(const Char* s) -> basic_string_view<Char> {
+  return s;
+}
+template <typename Char, typename Traits, typename Alloc>
+inline auto to_string_view(const std::basic_string<Char, Traits, Alloc>& s)
+    -> basic_string_view<Char> {
+  return s;
+}
+template <typename Char>
+constexpr auto to_string_view(basic_string_view<Char> s)
+    -> basic_string_view<Char> {
+  return s;
+}
+template <typename Char,
+          FMT_ENABLE_IF(!std::is_empty<std_string_view<Char>>::value)>
+inline auto to_string_view(std_string_view<Char> s) -> basic_string_view<Char> {
+  return s;
+}
+template <typename S, FMT_ENABLE_IF(is_compile_string<S>::value)>
+constexpr auto to_string_view(const S& s)
+    -> basic_string_view<typename S::char_type> {
+  return basic_string_view<typename S::char_type>(s);
+}
+void to_string_view(...);
+
+// Specifies whether S is a string type convertible to fmt::basic_string_view.
+// It should be a constexpr function but MSVC 2017 fails to compile it in
+// enable_if and MSVC 2015 fails to compile it as an alias template.
+// ADL is intentionally disabled as to_string_view is not an extension point.
+template <typename S>
+struct is_string
+    : std::is_class<decltype(detail::to_string_view(std::declval<S>()))> {};
+
+template <typename S, typename = void> struct char_t_impl {};
+template <typename S> struct char_t_impl<S, enable_if_t<is_string<S>::value>> {
+  using result = decltype(to_string_view(std::declval<S>()));
+  using type = typename result::value_type;
+};
+
+enum class type {
+  none_type,
+  // Integer types should go first,
+  int_type,
+  uint_type,
+  long_long_type,
+  ulong_long_type,
+  int128_type,
+  uint128_type,
+  bool_type,
+  char_type,
+  last_integer_type = char_type,
+  // followed by floating-point types.
+  float_type,
+  double_type,
+  long_double_type,
+  last_numeric_type = long_double_type,
+  cstring_type,
+  string_type,
+  pointer_type,
+  custom_type
+};
+
+// Maps core type T to the corresponding type enum constant.
+template <typename T, typename Char>
+struct type_constant : std::integral_constant<type, type::custom_type> {};
+
+#define FMT_TYPE_CONSTANT(Type, constant) \
+  template <typename Char>                \
+  struct type_constant<Type, Char>        \
+      : std::integral_constant<type, type::constant> {}
+
+FMT_TYPE_CONSTANT(int, int_type);
+FMT_TYPE_CONSTANT(unsigned, uint_type);
+FMT_TYPE_CONSTANT(long long, long_long_type);
+FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type);
+FMT_TYPE_CONSTANT(int128_opt, int128_type);
+FMT_TYPE_CONSTANT(uint128_opt, uint128_type);
+FMT_TYPE_CONSTANT(bool, bool_type);
+FMT_TYPE_CONSTANT(Char, char_type);
+FMT_TYPE_CONSTANT(float, float_type);
+FMT_TYPE_CONSTANT(double, double_type);
+FMT_TYPE_CONSTANT(long double, long_double_type);
+FMT_TYPE_CONSTANT(const Char*, cstring_type);
+FMT_TYPE_CONSTANT(basic_string_view<Char>, string_type);
+FMT_TYPE_CONSTANT(const void*, pointer_type);
+
+constexpr bool is_integral_type(type t) {
+  return t > type::none_type && t <= type::last_integer_type;
+}
+constexpr bool is_arithmetic_type(type t) {
+  return t > type::none_type && t <= type::last_numeric_type;
+}
+
+constexpr auto set(type rhs) -> int { return 1 << static_cast<int>(rhs); }
+constexpr auto in(type t, int set) -> bool {
+  return ((set >> static_cast<int>(t)) & 1) != 0;
+}
+
+// Bitsets of types.
+enum {
+  sint_set =
+      set(type::int_type) | set(type::long_long_type) | set(type::int128_type),
+  uint_set = set(type::uint_type) | set(type::ulong_long_type) |
+             set(type::uint128_type),
+  bool_set = set(type::bool_type),
+  char_set = set(type::char_type),
+  float_set = set(type::float_type) | set(type::double_type) |
+              set(type::long_double_type),
+  string_set = set(type::string_type),
+  cstring_set = set(type::cstring_type),
+  pointer_set = set(type::pointer_type)
+};
+
+FMT_NORETURN FMT_API void throw_format_error(const char* message);
+
+struct error_handler {
+  constexpr error_handler() = default;
+
+  // This function is intentionally not constexpr to give a compile-time error.
+  FMT_NORETURN void on_error(const char* message) {
+    throw_format_error(message);
+  }
+};
+}  // namespace detail
+
+/** Throws ``format_error`` with a given message. */
+using detail::throw_format_error;
+
+/** String's character type. */
+template <typename S> using char_t = typename detail::char_t_impl<S>::type;
+
+/**
+  \rst
+  Parsing context consisting of a format string range being parsed and an
+  argument counter for automatic indexing.
+  You can use the ``format_parse_context`` type alias for ``char`` instead.
+  \endrst
+ */
+FMT_EXPORT
+template <typename Char> class basic_format_parse_context {
+ private:
+  basic_string_view<Char> format_str_;
+  int next_arg_id_;
+
+  FMT_CONSTEXPR void do_check_arg_id(int id);
+
+ public:
+  using char_type = Char;
+  using iterator = const Char*;
+
+  explicit constexpr basic_format_parse_context(
+      basic_string_view<Char> format_str, int next_arg_id = 0)
+      : format_str_(format_str), next_arg_id_(next_arg_id) {}
+
+  /**
+    Returns an iterator to the beginning of the format string range being
+    parsed.
+   */
+  constexpr auto begin() const noexcept -> iterator {
+    return format_str_.begin();
+  }
+
+  /**
+    Returns an iterator past the end of the format string range being parsed.
+   */
+  constexpr auto end() const noexcept -> iterator { return format_str_.end(); }
+
+  /** Advances the begin iterator to ``it``. */
+  FMT_CONSTEXPR void advance_to(iterator it) {
+    format_str_.remove_prefix(detail::to_unsigned(it - begin()));
+  }
+
+  /**
+    Reports an error if using the manual argument indexing; otherwise returns
+    the next argument index and switches to the automatic indexing.
+   */
+  FMT_CONSTEXPR auto next_arg_id() -> int {
+    if (next_arg_id_ < 0) {
+      detail::throw_format_error(
+          "cannot switch from manual to automatic argument indexing");
+      return 0;
+    }
+    int id = next_arg_id_++;
+    do_check_arg_id(id);
+    return id;
+  }
+
+  /**
+    Reports an error if using the automatic argument indexing; otherwise
+    switches to the manual indexing.
+   */
+  FMT_CONSTEXPR void check_arg_id(int id) {
+    if (next_arg_id_ > 0) {
+      detail::throw_format_error(
+          "cannot switch from automatic to manual argument indexing");
+      return;
+    }
+    next_arg_id_ = -1;
+    do_check_arg_id(id);
+  }
+  FMT_CONSTEXPR void check_arg_id(basic_string_view<Char>) {}
+  FMT_CONSTEXPR void check_dynamic_spec(int arg_id);
+};
+
+FMT_EXPORT
+using format_parse_context = basic_format_parse_context<char>;
+
+namespace detail {
+// A parse context with extra data used only in compile-time checks.
+template <typename Char>
+class compile_parse_context : public basic_format_parse_context<Char> {
+ private:
+  int num_args_;
+  const type* types_;
+  using base = basic_format_parse_context<Char>;
+
+ public:
+  explicit FMT_CONSTEXPR compile_parse_context(
+      basic_string_view<Char> format_str, int num_args, const type* types,
+      int next_arg_id = 0)
+      : base(format_str, next_arg_id), num_args_(num_args), types_(types) {}
+
+  constexpr auto num_args() const -> int { return num_args_; }
+  constexpr auto arg_type(int id) const -> type { return types_[id]; }
+
+  FMT_CONSTEXPR auto next_arg_id() -> int {
+    int id = base::next_arg_id();
+    if (id >= num_args_) throw_format_error("argument not found");
+    return id;
+  }
+
+  FMT_CONSTEXPR void check_arg_id(int id) {
+    base::check_arg_id(id);
+    if (id >= num_args_) throw_format_error("argument not found");
+  }
+  using base::check_arg_id;
+
+  FMT_CONSTEXPR void check_dynamic_spec(int arg_id) {
+    detail::ignore_unused(arg_id);
+#if !defined(__LCC__)
+    if (arg_id < num_args_ && types_ && !is_integral_type(types_[arg_id]))
+      throw_format_error("width/precision is not integer");
+#endif
+  }
+};
+
+// Extracts a reference to the container from back_insert_iterator.
+template <typename Container>
+inline auto get_container(std::back_insert_iterator<Container> it)
+    -> Container& {
+  using base = std::back_insert_iterator<Container>;
+  struct accessor : base {
+    accessor(base b) : base(b) {}
+    using base::container;
+  };
+  return *accessor(it).container;
+}
+
+template <typename Char, typename InputIt, typename OutputIt>
+FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out)
+    -> OutputIt {
+  while (begin != end) *out++ = static_cast<Char>(*begin++);
+  return out;
+}
+
+template <typename Char, typename T, typename U,
+          FMT_ENABLE_IF(
+              std::is_same<remove_const_t<T>, U>::value&& is_char<U>::value)>
+FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* {
+  if (is_constant_evaluated()) return copy_str<Char, T*, U*>(begin, end, out);
+  auto size = to_unsigned(end - begin);
+  if (size > 0) memcpy(out, begin, size * sizeof(U));
+  return out + size;
+}
+
+/**
+  \rst
+  A contiguous memory buffer with an optional growing ability. It is an internal
+  class and shouldn't be used directly, only via `~fmt::basic_memory_buffer`.
+  \endrst
+ */
+template <typename T> class buffer {
+ private:
+  T* ptr_;
+  size_t size_;
+  size_t capacity_;
+
+ protected:
+  // Don't initialize ptr_ since it is not accessed to save a few cycles.
+  FMT_MSC_WARNING(suppress : 26495)
+  buffer(size_t sz) noexcept : size_(sz), capacity_(sz) {}
+
+  FMT_CONSTEXPR20 buffer(T* p = nullptr, size_t sz = 0, size_t cap = 0) noexcept
+      : ptr_(p), size_(sz), capacity_(cap) {}
+
+  FMT_CONSTEXPR20 ~buffer() = default;
+  buffer(buffer&&) = default;
+
+  /** Sets the buffer data and capacity. */
+  FMT_CONSTEXPR void set(T* buf_data, size_t buf_capacity) noexcept {
+    ptr_ = buf_data;
+    capacity_ = buf_capacity;
+  }
+
+  /** Increases the buffer capacity to hold at least *capacity* elements. */
+  virtual FMT_CONSTEXPR20 void grow(size_t capacity) = 0;
+
+ public:
+  using value_type = T;
+  using const_reference = const T&;
+
+  buffer(const buffer&) = delete;
+  void operator=(const buffer&) = delete;
+
+  FMT_INLINE auto begin() noexcept -> T* { return ptr_; }
+  FMT_INLINE auto end() noexcept -> T* { return ptr_ + size_; }
+
+  FMT_INLINE auto begin() const noexcept -> const T* { return ptr_; }
+  FMT_INLINE auto end() const noexcept -> const T* { return ptr_ + size_; }
+
+  /** Returns the size of this buffer. */
+  constexpr auto size() const noexcept -> size_t { return size_; }
+
+  /** Returns the capacity of this buffer. */
+  constexpr auto capacity() const noexcept -> size_t { return capacity_; }
+
+  /** Returns a pointer to the buffer data (not null-terminated). */
+  FMT_CONSTEXPR auto data() noexcept -> T* { return ptr_; }
+  FMT_CONSTEXPR auto data() const noexcept -> const T* { return ptr_; }
+
+  /** Clears this buffer. */
+  void clear() { size_ = 0; }
+
+  // Tries resizing the buffer to contain *count* elements. If T is a POD type
+  // the new elements may not be initialized.
+  FMT_CONSTEXPR20 void try_resize(size_t count) {
+    try_reserve(count);
+    size_ = count <= capacity_ ? count : capacity_;
+  }
+
+  // Tries increasing the buffer capacity to *new_capacity*. It can increase the
+  // capacity by a smaller amount than requested but guarantees there is space
+  // for at least one additional element either by increasing the capacity or by
+  // flushing the buffer if it is full.
+  FMT_CONSTEXPR20 void try_reserve(size_t new_capacity) {
+    if (new_capacity > capacity_) grow(new_capacity);
+  }
+
+  FMT_CONSTEXPR20 void push_back(const T& value) {
+    try_reserve(size_ + 1);
+    ptr_[size_++] = value;
+  }
+
+  /** Appends data to the end of the buffer. */
+  template <typename U> void append(const U* begin, const U* end);
+
+  template <typename Idx> FMT_CONSTEXPR auto operator[](Idx index) -> T& {
+    return ptr_[index];
+  }
+  template <typename Idx>
+  FMT_CONSTEXPR auto operator[](Idx index) const -> const T& {
+    return ptr_[index];
+  }
+};
+
+struct buffer_traits {
+  explicit buffer_traits(size_t) {}
+  auto count() const -> size_t { return 0; }
+  auto limit(size_t size) -> size_t { return size; }
+};
+
+class fixed_buffer_traits {
+ private:
+  size_t count_ = 0;
+  size_t limit_;
+
+ public:
+  explicit fixed_buffer_traits(size_t limit) : limit_(limit) {}
+  auto count() const -> size_t { return count_; }
+  auto limit(size_t size) -> size_t {
+    size_t n = limit_ > count_ ? limit_ - count_ : 0;
+    count_ += size;
+    return size < n ? size : n;
+  }
+};
+
+// A buffer that writes to an output iterator when flushed.
+template <typename OutputIt, typename T, typename Traits = buffer_traits>
+class iterator_buffer final : public Traits, public buffer<T> {
+ private:
+  OutputIt out_;
+  enum { buffer_size = 256 };
+  T data_[buffer_size];
+
+ protected:
+  FMT_CONSTEXPR20 void grow(size_t) override {
+    if (this->size() == buffer_size) flush();
+  }
+
+  void flush() {
+    auto size = this->size();
+    this->clear();
+    out_ = copy_str<T>(data_, data_ + this->limit(size), out_);
+  }
+
+ public:
+  explicit iterator_buffer(OutputIt out, size_t n = buffer_size)
+      : Traits(n), buffer<T>(data_, 0, buffer_size), out_(out) {}
+  iterator_buffer(iterator_buffer&& other)
+      : Traits(other), buffer<T>(data_, 0, buffer_size), out_(other.out_) {}
+  ~iterator_buffer() { flush(); }
+
+  auto out() -> OutputIt {
+    flush();
+    return out_;
+  }
+  auto count() const -> size_t { return Traits::count() + this->size(); }
+};
+
+template <typename T>
+class iterator_buffer<T*, T, fixed_buffer_traits> final
+    : public fixed_buffer_traits,
+      public buffer<T> {
+ private:
+  T* out_;
+  enum { buffer_size = 256 };
+  T data_[buffer_size];
+
+ protected:
+  FMT_CONSTEXPR20 void grow(size_t) override {
+    if (this->size() == this->capacity()) flush();
+  }
+
+  void flush() {
+    size_t n = this->limit(this->size());
+    if (this->data() == out_) {
+      out_ += n;
+      this->set(data_, buffer_size);
+    }
+    this->clear();
+  }
+
+ public:
+  explicit iterator_buffer(T* out, size_t n = buffer_size)
+      : fixed_buffer_traits(n), buffer<T>(out, 0, n), out_(out) {}
+  iterator_buffer(iterator_buffer&& other)
+      : fixed_buffer_traits(other),
+        buffer<T>(std::move(other)),
+        out_(other.out_) {
+    if (this->data() != out_) {
+      this->set(data_, buffer_size);
+      this->clear();
+    }
+  }
+  ~iterator_buffer() { flush(); }
+
+  auto out() -> T* {
+    flush();
+    return out_;
+  }
+  auto count() const -> size_t {
+    return fixed_buffer_traits::count() + this->size();
+  }
+};
+
+template <typename T> class iterator_buffer<T*, T> final : public buffer<T> {
+ protected:
+  FMT_CONSTEXPR20 void grow(size_t) override {}
+
+ public:
+  explicit iterator_buffer(T* out, size_t = 0) : buffer<T>(out, 0, ~size_t()) {}
+
+  auto out() -> T* { return &*this->end(); }
+};
+
+// A buffer that writes to a container with the contiguous storage.
+template <typename Container>
+class iterator_buffer<std::back_insert_iterator<Container>,
+                      enable_if_t<is_contiguous<Container>::value,
+                                  typename Container::value_type>>
+    final : public buffer<typename Container::value_type> {
+ private:
+  Container& container_;
+
+ protected:
+  FMT_CONSTEXPR20 void grow(size_t capacity) override {
+    container_.resize(capacity);
+    this->set(&container_[0], capacity);
+  }
+
+ public:
+  explicit iterator_buffer(Container& c)
+      : buffer<typename Container::value_type>(c.size()), container_(c) {}
+  explicit iterator_buffer(std::back_insert_iterator<Container> out, size_t = 0)
+      : iterator_buffer(get_container(out)) {}
+
+  auto out() -> std::back_insert_iterator<Container> {
+    return std::back_inserter(container_);
+  }
+};
+
+// A buffer that counts the number of code units written discarding the output.
+template <typename T = char> class counting_buffer final : public buffer<T> {
+ private:
+  enum { buffer_size = 256 };
+  T data_[buffer_size];
+  size_t count_ = 0;
+
+ protected:
+  FMT_CONSTEXPR20 void grow(size_t) override {
+    if (this->size() != buffer_size) return;
+    count_ += this->size();
+    this->clear();
+  }
+
+ public:
+  counting_buffer() : buffer<T>(data_, 0, buffer_size) {}
+
+  auto count() -> size_t { return count_ + this->size(); }
+};
+}  // namespace detail
+
+template <typename Char>
+FMT_CONSTEXPR void basic_format_parse_context<Char>::do_check_arg_id(int id) {
+  // Argument id is only checked at compile-time during parsing because
+  // formatting has its own validation.
+  if (detail::is_constant_evaluated() &&
+      (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) {
+    using context = detail::compile_parse_context<Char>;
+    if (id >= static_cast<context*>(this)->num_args())
+      detail::throw_format_error("argument not found");
+  }
+}
+
+template <typename Char>
+FMT_CONSTEXPR void basic_format_parse_context<Char>::check_dynamic_spec(
+    int arg_id) {
+  if (detail::is_constant_evaluated() &&
+      (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) {
+    using context = detail::compile_parse_context<Char>;
+    static_cast<context*>(this)->check_dynamic_spec(arg_id);
+  }
+}
+
+FMT_EXPORT template <typename Context> class basic_format_arg;
+FMT_EXPORT template <typename Context> class basic_format_args;
+FMT_EXPORT template <typename Context> class dynamic_format_arg_store;
+
+// A formatter for objects of type T.
+FMT_EXPORT
+template <typename T, typename Char = char, typename Enable = void>
+struct formatter {
+  // A deleted default constructor indicates a disabled formatter.
+  formatter() = delete;
+};
+
+// Specifies if T has an enabled formatter specialization. A type can be
+// formattable even if it doesn't have a formatter e.g. via a conversion.
+template <typename T, typename Context>
+using has_formatter =
+    std::is_constructible<typename Context::template formatter_type<T>>;
+
+// An output iterator that appends to a buffer.
+// It is used to reduce symbol sizes for the common case.
+class appender : public std::back_insert_iterator<detail::buffer<char>> {
+  using base = std::back_insert_iterator<detail::buffer<char>>;
+
+ public:
+  using std::back_insert_iterator<detail::buffer<char>>::back_insert_iterator;
+  appender(base it) noexcept : base(it) {}
+  FMT_UNCHECKED_ITERATOR(appender);
+
+  auto operator++() noexcept -> appender& { return *this; }
+  auto operator++(int) noexcept -> appender { return *this; }
+};
+
+namespace detail {
+
+template <typename Context, typename T>
+constexpr auto has_const_formatter_impl(T*)
+    -> decltype(typename Context::template formatter_type<T>().format(
+                    std::declval<const T&>(), std::declval<Context&>()),
+                true) {
+  return true;
+}
+template <typename Context>
+constexpr auto has_const_formatter_impl(...) -> bool {
+  return false;
+}
+template <typename T, typename Context>
+constexpr auto has_const_formatter() -> bool {
+  return has_const_formatter_impl<Context>(static_cast<T*>(nullptr));
+}
+
+template <typename T>
+using buffer_appender = conditional_t<std::is_same<T, char>::value, appender,
+                                      std::back_insert_iterator<buffer<T>>>;
+
+// Maps an output iterator to a buffer.
+template <typename T, typename OutputIt>
+auto get_buffer(OutputIt out) -> iterator_buffer<OutputIt, T> {
+  return iterator_buffer<OutputIt, T>(out);
+}
+template <typename T, typename Buf,
+          FMT_ENABLE_IF(std::is_base_of<buffer<char>, Buf>::value)>
+auto get_buffer(std::back_insert_iterator<Buf> out) -> buffer<char>& {
+  return get_container(out);
+}
+
+template <typename Buf, typename OutputIt>
+FMT_INLINE auto get_iterator(Buf& buf, OutputIt) -> decltype(buf.out()) {
+  return buf.out();
+}
+template <typename T, typename OutputIt>
+auto get_iterator(buffer<T>&, OutputIt out) -> OutputIt {
+  return out;
+}
+
+struct view {};
+
+template <typename Char, typename T> struct named_arg : view {
+  const Char* name;
+  const T& value;
+  named_arg(const Char* n, const T& v) : name(n), value(v) {}
+};
+
+template <typename Char> struct named_arg_info {
+  const Char* name;
+  int id;
+};
+
+template <typename T, typename Char, size_t NUM_ARGS, size_t NUM_NAMED_ARGS>
+struct arg_data {
+  // args_[0].named_args points to named_args_ to avoid bloating format_args.
+  // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning.
+  T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : +1)];
+  named_arg_info<Char> named_args_[NUM_NAMED_ARGS];
+
+  template <typename... U>
+  arg_data(const U&... init) : args_{T(named_args_, NUM_NAMED_ARGS), init...} {}
+  arg_data(const arg_data& other) = delete;
+  auto args() const -> const T* { return args_ + 1; }
+  auto named_args() -> named_arg_info<Char>* { return named_args_; }
+};
+
+template <typename T, typename Char, size_t NUM_ARGS>
+struct arg_data<T, Char, NUM_ARGS, 0> {
+  // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning.
+  T args_[NUM_ARGS != 0 ? NUM_ARGS : +1];
+
+  template <typename... U>
+  FMT_CONSTEXPR FMT_INLINE arg_data(const U&... init) : args_{init...} {}
+  FMT_CONSTEXPR FMT_INLINE auto args() const -> const T* { return args_; }
+  FMT_CONSTEXPR FMT_INLINE auto named_args() -> std::nullptr_t {
+    return nullptr;
+  }
+};
+
+template <typename Char>
+inline void init_named_args(named_arg_info<Char>*, int, int) {}
+
+template <typename T> struct is_named_arg : std::false_type {};
+template <typename T> struct is_statically_named_arg : std::false_type {};
+
+template <typename T, typename Char>
+struct is_named_arg<named_arg<Char, T>> : std::true_type {};
+
+template <typename Char, typename T, typename... Tail,
+          FMT_ENABLE_IF(!is_named_arg<T>::value)>
+void init_named_args(named_arg_info<Char>* named_args, int arg_count,
+                     int named_arg_count, const T&, const Tail&... args) {
+  init_named_args(named_args, arg_count + 1, named_arg_count, args...);
+}
+
+template <typename Char, typename T, typename... Tail,
+          FMT_ENABLE_IF(is_named_arg<T>::value)>
+void init_named_args(named_arg_info<Char>* named_args, int arg_count,
+                     int named_arg_count, const T& arg, const Tail&... args) {
+  named_args[named_arg_count++] = {arg.name, arg_count};
+  init_named_args(named_args, arg_count + 1, named_arg_count, args...);
+}
+
+template <typename... Args>
+FMT_CONSTEXPR FMT_INLINE void init_named_args(std::nullptr_t, int, int,
+                                              const Args&...) {}
+
+template <bool B = false> constexpr auto count() -> size_t { return B ? 1 : 0; }
+template <bool B1, bool B2, bool... Tail> constexpr auto count() -> size_t {
+  return (B1 ? 1 : 0) + count<B2, Tail...>();
+}
+
+template <typename... Args> constexpr auto count_named_args() -> size_t {
+  return count<is_named_arg<Args>::value...>();
+}
+
+template <typename... Args>
+constexpr auto count_statically_named_args() -> size_t {
+  return count<is_statically_named_arg<Args>::value...>();
+}
+
+struct unformattable {};
+struct unformattable_char : unformattable {};
+struct unformattable_pointer : unformattable {};
+
+template <typename Char> struct string_value {
+  const Char* data;
+  size_t size;
+};
+
+template <typename Char> struct named_arg_value {
+  const named_arg_info<Char>* data;
+  size_t size;
+};
+
+template <typename Context> struct custom_value {
+  using parse_context = typename Context::parse_context_type;
+  void* value;
+  void (*format)(void* arg, parse_context& parse_ctx, Context& ctx);
+};
+
+// A formatting argument value.
+template <typename Context> class value {
+ public:
+  using char_type = typename Context::char_type;
+
+  union {
+    monostate no_value;
+    int int_value;
+    unsigned uint_value;
+    long long long_long_value;
+    unsigned long long ulong_long_value;
+    int128_opt int128_value;
+    uint128_opt uint128_value;
+    bool bool_value;
+    char_type char_value;
+    float float_value;
+    double double_value;
+    long double long_double_value;
+    const void* pointer;
+    string_value<char_type> string;
+    custom_value<Context> custom;
+    named_arg_value<char_type> named_args;
+  };
+
+  constexpr FMT_INLINE value() : no_value() {}
+  constexpr FMT_INLINE value(int val) : int_value(val) {}
+  constexpr FMT_INLINE value(unsigned val) : uint_value(val) {}
+  constexpr FMT_INLINE value(long long val) : long_long_value(val) {}
+  constexpr FMT_INLINE value(unsigned long long val) : ulong_long_value(val) {}
+  FMT_INLINE value(int128_opt val) : int128_value(val) {}
+  FMT_INLINE value(uint128_opt val) : uint128_value(val) {}
+  constexpr FMT_INLINE value(float val) : float_value(val) {}
+  constexpr FMT_INLINE value(double val) : double_value(val) {}
+  FMT_INLINE value(long double val) : long_double_value(val) {}
+  constexpr FMT_INLINE value(bool val) : bool_value(val) {}
+  constexpr FMT_INLINE value(char_type val) : char_value(val) {}
+  FMT_CONSTEXPR FMT_INLINE value(const char_type* val) {
+    string.data = val;
+    if (is_constant_evaluated()) string.size = {};
+  }
+  FMT_CONSTEXPR FMT_INLINE value(basic_string_view<char_type> val) {
+    string.data = val.data();
+    string.size = val.size();
+  }
+  FMT_INLINE value(const void* val) : pointer(val) {}
+  FMT_INLINE value(const named_arg_info<char_type>* args, size_t size)
+      : named_args{args, size} {}
+
+  template <typename T> FMT_CONSTEXPR20 FMT_INLINE value(T& val) {
+    using value_type = remove_const_t<T>;
+    custom.value = const_cast<value_type*>(std::addressof(val));
+    // Get the formatter type through the context to allow different contexts
+    // have different extension points, e.g. `formatter<T>` for `format` and
+    // `printf_formatter<T>` for `printf`.
+    custom.format = format_custom_arg<
+        value_type, typename Context::template formatter_type<value_type>>;
+  }
+  value(unformattable);
+  value(unformattable_char);
+  value(unformattable_pointer);
+
+ private:
+  // Formats an argument of a custom type, such as a user-defined class.
+  template <typename T, typename Formatter>
+  static void format_custom_arg(void* arg,
+                                typename Context::parse_context_type& parse_ctx,
+                                Context& ctx) {
+    auto f = Formatter();
+    parse_ctx.advance_to(f.parse(parse_ctx));
+    using qualified_type =
+        conditional_t<has_const_formatter<T, Context>(), const T, T>;
+    ctx.advance_to(f.format(*static_cast<qualified_type*>(arg), ctx));
+  }
+};
+
+// To minimize the number of types we need to deal with, long is translated
+// either to int or to long long depending on its size.
+enum { long_short = sizeof(long) == sizeof(int) };
+using long_type = conditional_t<long_short, int, long long>;
+using ulong_type = conditional_t<long_short, unsigned, unsigned long long>;
+
+template <typename T> struct format_as_result {
+  template <typename U,
+            FMT_ENABLE_IF(std::is_enum<U>::value || std::is_class<U>::value)>
+  static auto map(U*) -> decltype(format_as(std::declval<U>()));
+  static auto map(...) -> void;
+
+  using type = decltype(map(static_cast<T*>(nullptr)));
+};
+template <typename T> using format_as_t = typename format_as_result<T>::type;
+
+template <typename T>
+struct has_format_as
+    : bool_constant<!std::is_same<format_as_t<T>, void>::value> {};
+
+// Maps formatting arguments to core types.
+// arg_mapper reports errors by returning unformattable instead of using
+// static_assert because it's used in the is_formattable trait.
+template <typename Context> struct arg_mapper {
+  using char_type = typename Context::char_type;
+
+  FMT_CONSTEXPR FMT_INLINE auto map(signed char val) -> int { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(unsigned char val) -> unsigned {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(short val) -> int { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(unsigned short val) -> unsigned {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(int val) -> int { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(unsigned val) -> unsigned { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(long val) -> long_type { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(unsigned long val) -> ulong_type {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(long long val) -> long long { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(unsigned long long val)
+      -> unsigned long long {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(int128_opt val) -> int128_opt {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(uint128_opt val) -> uint128_opt {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(bool val) -> bool { return val; }
+
+  template <typename T, FMT_ENABLE_IF(std::is_same<T, char>::value ||
+                                      std::is_same<T, char_type>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(T val) -> char_type {
+    return val;
+  }
+  template <typename T, enable_if_t<(std::is_same<T, wchar_t>::value ||
+#ifdef __cpp_char8_t
+                                     std::is_same<T, char8_t>::value ||
+#endif
+                                     std::is_same<T, char16_t>::value ||
+                                     std::is_same<T, char32_t>::value) &&
+                                        !std::is_same<T, char_type>::value,
+                                    int> = 0>
+  FMT_CONSTEXPR FMT_INLINE auto map(T) -> unformattable_char {
+    return {};
+  }
+
+  FMT_CONSTEXPR FMT_INLINE auto map(float val) -> float { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(double val) -> double { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(long double val) -> long double {
+    return val;
+  }
+
+  FMT_CONSTEXPR FMT_INLINE auto map(char_type* val) -> const char_type* {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(const char_type* val) -> const char_type* {
+    return val;
+  }
+  template <typename T,
+            FMT_ENABLE_IF(is_string<T>::value && !std::is_pointer<T>::value &&
+                          std::is_same<char_type, char_t<T>>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(const T& val)
+      -> basic_string_view<char_type> {
+    return to_string_view(val);
+  }
+  template <typename T,
+            FMT_ENABLE_IF(is_string<T>::value && !std::is_pointer<T>::value &&
+                          !std::is_same<char_type, char_t<T>>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(const T&) -> unformattable_char {
+    return {};
+  }
+
+  FMT_CONSTEXPR FMT_INLINE auto map(void* val) -> const void* { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(const void* val) -> const void* {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(std::nullptr_t val) -> const void* {
+    return val;
+  }
+
+  // Use SFINAE instead of a const T* parameter to avoid a conflict with the
+  // array overload.
+  template <
+      typename T,
+      FMT_ENABLE_IF(
+          std::is_pointer<T>::value || std::is_member_pointer<T>::value ||
+          std::is_function<typename std::remove_pointer<T>::type>::value ||
+          (std::is_array<T>::value &&
+           !std::is_convertible<T, const char_type*>::value))>
+  FMT_CONSTEXPR auto map(const T&) -> unformattable_pointer {
+    return {};
+  }
+
+  template <typename T, std::size_t N,
+            FMT_ENABLE_IF(!std::is_same<T, wchar_t>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(const T (&values)[N]) -> const T (&)[N] {
+    return values;
+  }
+
+  // Only map owning types because mapping views can be unsafe.
+  template <typename T, typename U = format_as_t<T>,
+            FMT_ENABLE_IF(std::is_arithmetic<U>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(const T& val) -> decltype(this->map(U())) {
+    return map(format_as(val));
+  }
+
+  template <typename T, typename U = remove_const_t<T>>
+  struct formattable : bool_constant<has_const_formatter<U, Context>() ||
+                                     (has_formatter<U, Context>::value &&
+                                      !std::is_const<T>::value)> {};
+
+  template <typename T, FMT_ENABLE_IF(formattable<T>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto do_map(T& val) -> T& {
+    return val;
+  }
+  template <typename T, FMT_ENABLE_IF(!formattable<T>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto do_map(T&) -> unformattable {
+    return {};
+  }
+
+  template <typename T, typename U = remove_const_t<T>,
+            FMT_ENABLE_IF((std::is_class<U>::value || std::is_enum<U>::value ||
+                           std::is_union<U>::value) &&
+                          !is_string<U>::value && !is_char<U>::value &&
+                          !is_named_arg<U>::value &&
+                          !std::is_arithmetic<format_as_t<U>>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(T& val) -> decltype(this->do_map(val)) {
+    return do_map(val);
+  }
+
+  template <typename T, FMT_ENABLE_IF(is_named_arg<T>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(const T& named_arg)
+      -> decltype(this->map(named_arg.value)) {
+    return map(named_arg.value);
+  }
+
+  auto map(...) -> unformattable { return {}; }
+};
+
+// A type constant after applying arg_mapper<Context>.
+template <typename T, typename Context>
+using mapped_type_constant =
+    type_constant<decltype(arg_mapper<Context>().map(std::declval<const T&>())),
+                  typename Context::char_type>;
+
+enum { packed_arg_bits = 4 };
+// Maximum number of arguments with packed types.
+enum { max_packed_args = 62 / packed_arg_bits };
+enum : unsigned long long { is_unpacked_bit = 1ULL << 63 };
+enum : unsigned long long { has_named_args_bit = 1ULL << 62 };
+
+template <typename Char, typename InputIt>
+auto copy_str(InputIt begin, InputIt end, appender out) -> appender {
+  get_container(out).append(begin, end);
+  return out;
+}
+template <typename Char, typename InputIt>
+auto copy_str(InputIt begin, InputIt end,
+              std::back_insert_iterator<std::string> out)
+    -> std::back_insert_iterator<std::string> {
+  get_container(out).append(begin, end);
+  return out;
+}
+
+template <typename Char, typename R, typename OutputIt>
+FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt {
+  return detail::copy_str<Char>(rng.begin(), rng.end(), out);
+}
+
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500
+// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
+template <typename...> struct void_t_impl { using type = void; };
+template <typename... T> using void_t = typename void_t_impl<T...>::type;
+#else
+template <typename...> using void_t = void;
+#endif
+
+template <typename It, typename T, typename Enable = void>
+struct is_output_iterator : std::false_type {};
+
+template <typename It, typename T>
+struct is_output_iterator<
+    It, T,
+    void_t<typename std::iterator_traits<It>::iterator_category,
+           decltype(*std::declval<It>() = std::declval<T>())>>
+    : std::true_type {};
+
+template <typename It> struct is_back_insert_iterator : std::false_type {};
+template <typename Container>
+struct is_back_insert_iterator<std::back_insert_iterator<Container>>
+    : std::true_type {};
+
+// A type-erased reference to an std::locale to avoid a heavy <locale> include.
+class locale_ref {
+ private:
+  const void* locale_;  // A type-erased pointer to std::locale.
+
+ public:
+  constexpr FMT_INLINE locale_ref() : locale_(nullptr) {}
+  template <typename Locale> explicit locale_ref(const Locale& loc);
+
+  explicit operator bool() const noexcept { return locale_ != nullptr; }
+
+  template <typename Locale> auto get() const -> Locale;
+};
+
+template <typename> constexpr auto encode_types() -> unsigned long long {
+  return 0;
+}
+
+template <typename Context, typename Arg, typename... Args>
+constexpr auto encode_types() -> unsigned long long {
+  return static_cast<unsigned>(mapped_type_constant<Arg, Context>::value) |
+         (encode_types<Context, Args...>() << packed_arg_bits);
+}
+
+#if defined(__cpp_if_constexpr)
+// This type is intentionally undefined, only used for errors
+template <typename T, typename Char> struct type_is_unformattable_for;
+#endif
+
+template <bool PACKED, typename Context, typename T, FMT_ENABLE_IF(PACKED)>
+FMT_CONSTEXPR FMT_INLINE auto make_arg(T& val) -> value<Context> {
+  using arg_type = remove_cvref_t<decltype(arg_mapper<Context>().map(val))>;
+
+  constexpr bool formattable_char =
+      !std::is_same<arg_type, unformattable_char>::value;
+  static_assert(formattable_char, "Mixing character types is disallowed.");
+
+  // Formatting of arbitrary pointers is disallowed. If you want to format a
+  // pointer cast it to `void*` or `const void*`. In particular, this forbids
+  // formatting of `[const] volatile char*` printed as bool by iostreams.
+  constexpr bool formattable_pointer =
+      !std::is_same<arg_type, unformattable_pointer>::value;
+  static_assert(formattable_pointer,
+                "Formatting of non-void pointers is disallowed.");
+
+  constexpr bool formattable = !std::is_same<arg_type, unformattable>::value;
+#if defined(__cpp_if_constexpr)
+  if constexpr (!formattable) {
+    type_is_unformattable_for<T, typename Context::char_type> _;
+  }
+#endif
+  static_assert(
+      formattable,
+      "Cannot format an argument. To make type T formattable provide a "
+      "formatter<T> specialization: https://fmt.dev/latest/api.html#udt");
+  return {arg_mapper<Context>().map(val)};
+}
+
+template <typename Context, typename T>
+FMT_CONSTEXPR auto make_arg(T& val) -> basic_format_arg<Context> {
+  auto arg = basic_format_arg<Context>();
+  arg.type_ = mapped_type_constant<T, Context>::value;
+  arg.value_ = make_arg<true, Context>(val);
+  return arg;
+}
+
+template <bool PACKED, typename Context, typename T, FMT_ENABLE_IF(!PACKED)>
+FMT_CONSTEXPR inline auto make_arg(T& val) -> basic_format_arg<Context> {
+  return make_arg<Context>(val);
+}
+}  // namespace detail
+FMT_BEGIN_EXPORT
+
+// A formatting argument. It is a trivially copyable/constructible type to
+// allow storage in basic_memory_buffer.
+template <typename Context> class basic_format_arg {
+ private:
+  detail::value<Context> value_;
+  detail::type type_;
+
+  template <typename ContextType, typename T>
+  friend FMT_CONSTEXPR auto detail::make_arg(T& value)
+      -> basic_format_arg<ContextType>;
+
+  template <typename Visitor, typename Ctx>
+  friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis,
+                                             const basic_format_arg<Ctx>& arg)
+      -> decltype(vis(0));
+
+  friend class basic_format_args<Context>;
+  friend class dynamic_format_arg_store<Context>;
+
+  using char_type = typename Context::char_type;
+
+  template <typename T, typename Char, size_t NUM_ARGS, size_t NUM_NAMED_ARGS>
+  friend struct detail::arg_data;
+
+  basic_format_arg(const detail::named_arg_info<char_type>* args, size_t size)
+      : value_(args, size) {}
+
+ public:
+  class handle {
+   public:
+    explicit handle(detail::custom_value<Context> custom) : custom_(custom) {}
+
+    void format(typename Context::parse_context_type& parse_ctx,
+                Context& ctx) const {
+      custom_.format(custom_.value, parse_ctx, ctx);
+    }
+
+   private:
+    detail::custom_value<Context> custom_;
+  };
+
+  constexpr basic_format_arg() : type_(detail::type::none_type) {}
+
+  constexpr explicit operator bool() const noexcept {
+    return type_ != detail::type::none_type;
+  }
+
+  auto type() const -> detail::type { return type_; }
+
+  auto is_integral() const -> bool { return detail::is_integral_type(type_); }
+  auto is_arithmetic() const -> bool {
+    return detail::is_arithmetic_type(type_);
+  }
+};
+
+/**
+  \rst
+  Visits an argument dispatching to the appropriate visit method based on
+  the argument type. For example, if the argument type is ``double`` then
+  ``vis(value)`` will be called with the value of type ``double``.
+  \endrst
+ */
+// DEPRECATED!
+template <typename Visitor, typename Context>
+FMT_CONSTEXPR FMT_INLINE auto visit_format_arg(
+    Visitor&& vis, const basic_format_arg<Context>& arg) -> decltype(vis(0)) {
+  switch (arg.type_) {
+  case detail::type::none_type:
+    break;
+  case detail::type::int_type:
+    return vis(arg.value_.int_value);
+  case detail::type::uint_type:
+    return vis(arg.value_.uint_value);
+  case detail::type::long_long_type:
+    return vis(arg.value_.long_long_value);
+  case detail::type::ulong_long_type:
+    return vis(arg.value_.ulong_long_value);
+  case detail::type::int128_type:
+    return vis(detail::convert_for_visit(arg.value_.int128_value));
+  case detail::type::uint128_type:
+    return vis(detail::convert_for_visit(arg.value_.uint128_value));
+  case detail::type::bool_type:
+    return vis(arg.value_.bool_value);
+  case detail::type::char_type:
+    return vis(arg.value_.char_value);
+  case detail::type::float_type:
+    return vis(arg.value_.float_value);
+  case detail::type::double_type:
+    return vis(arg.value_.double_value);
+  case detail::type::long_double_type:
+    return vis(arg.value_.long_double_value);
+  case detail::type::cstring_type:
+    return vis(arg.value_.string.data);
+  case detail::type::string_type:
+    using sv = basic_string_view<typename Context::char_type>;
+    return vis(sv(arg.value_.string.data, arg.value_.string.size));
+  case detail::type::pointer_type:
+    return vis(arg.value_.pointer);
+  case detail::type::custom_type:
+    return vis(typename basic_format_arg<Context>::handle(arg.value_.custom));
+  }
+  return vis(monostate());
+}
+
+// Formatting context.
+template <typename OutputIt, typename Char> class basic_format_context {
+ private:
+  OutputIt out_;
+  basic_format_args<basic_format_context> args_;
+  detail::locale_ref loc_;
+
+ public:
+  using iterator = OutputIt;
+  using format_arg = basic_format_arg<basic_format_context>;
+  using format_args = basic_format_args<basic_format_context>;
+  using parse_context_type = basic_format_parse_context<Char>;
+  template <typename T> using formatter_type = formatter<T, Char>;
+
+  /** The character type for the output. */
+  using char_type = Char;
+
+  basic_format_context(basic_format_context&&) = default;
+  basic_format_context(const basic_format_context&) = delete;
+  void operator=(const basic_format_context&) = delete;
+  /**
+    Constructs a ``basic_format_context`` object. References to the arguments
+    are stored in the object so make sure they have appropriate lifetimes.
+   */
+  constexpr basic_format_context(OutputIt out, format_args ctx_args,
+                                 detail::locale_ref loc = {})
+      : out_(out), args_(ctx_args), loc_(loc) {}
+
+  constexpr auto arg(int id) const -> format_arg { return args_.get(id); }
+  FMT_CONSTEXPR auto arg(basic_string_view<Char> name) -> format_arg {
+    return args_.get(name);
+  }
+  FMT_CONSTEXPR auto arg_id(basic_string_view<Char> name) -> int {
+    return args_.get_id(name);
+  }
+  auto args() const -> const format_args& { return args_; }
+
+  FMT_CONSTEXPR auto error_handler() -> detail::error_handler { return {}; }
+  void on_error(const char* message) { error_handler().on_error(message); }
+
+  // Returns an iterator to the beginning of the output range.
+  FMT_CONSTEXPR auto out() -> iterator { return out_; }
+
+  // Advances the begin iterator to ``it``.
+  void advance_to(iterator it) {
+    if (!detail::is_back_insert_iterator<iterator>()) out_ = it;
+  }
+
+  FMT_CONSTEXPR auto locale() -> detail::locale_ref { return loc_; }
+};
+
+template <typename Char>
+using buffer_context =
+    basic_format_context<detail::buffer_appender<Char>, Char>;
+using format_context = buffer_context<char>;
+
+template <typename T, typename Char = char>
+using is_formattable = bool_constant<!std::is_base_of<
+    detail::unformattable, decltype(detail::arg_mapper<buffer_context<Char>>()
+                                        .map(std::declval<T&>()))>::value>;
+
+/**
+  \rst
+  An array of references to arguments. It can be implicitly converted into
+  `~fmt::basic_format_args` for passing into type-erased formatting functions
+  such as `~fmt::vformat`.
+  \endrst
+ */
+template <typename Context, typename... Args>
+class format_arg_store
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
+    // Workaround a GCC template argument substitution bug.
+    : public basic_format_args<Context>
+#endif
+{
+ private:
+  static const size_t num_args = sizeof...(Args);
+  static constexpr size_t num_named_args = detail::count_named_args<Args...>();
+  static const bool is_packed = num_args <= detail::max_packed_args;
+
+  using value_type = conditional_t<is_packed, detail::value<Context>,
+                                   basic_format_arg<Context>>;
+
+  detail::arg_data<value_type, typename Context::char_type, num_args,
+                   num_named_args>
+      data_;
+
+  friend class basic_format_args<Context>;
+
+  static constexpr unsigned long long desc =
+      (is_packed ? detail::encode_types<Context, Args...>()
+                 : detail::is_unpacked_bit | num_args) |
+      (num_named_args != 0
+           ? static_cast<unsigned long long>(detail::has_named_args_bit)
+           : 0);
+
+ public:
+  template <typename... T>
+  FMT_CONSTEXPR FMT_INLINE format_arg_store(T&... args)
+      :
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
+        basic_format_args<Context>(*this),
+#endif
+        data_{detail::make_arg<is_packed, Context>(args)...} {
+    if (detail::const_check(num_named_args != 0))
+      detail::init_named_args(data_.named_args(), 0, 0, args...);
+  }
+};
+
+/**
+  \rst
+  Constructs a `~fmt::format_arg_store` object that contains references to
+  arguments and can be implicitly converted to `~fmt::format_args`. `Context`
+  can be omitted in which case it defaults to `~fmt::format_context`.
+  See `~fmt::arg` for lifetime considerations.
+  \endrst
+ */
+// Arguments are taken by lvalue references to avoid some lifetime issues.
+template <typename Context = format_context, typename... T>
+constexpr auto make_format_args(T&... args)
+    -> format_arg_store<Context, remove_cvref_t<T>...> {
+  return {args...};
+}
+
+/**
+  \rst
+  Returns a named argument to be used in a formatting function.
+  It should only be used in a call to a formatting function or
+  `dynamic_format_arg_store::push_back`.
+
+  **Example**::
+
+    fmt::print("Elapsed time: {s:.2f} seconds", fmt::arg("s", 1.23));
+  \endrst
+ */
+template <typename Char, typename T>
+inline auto arg(const Char* name, const T& arg) -> detail::named_arg<Char, T> {
+  static_assert(!detail::is_named_arg<T>(), "nested named arguments");
+  return {name, arg};
+}
+FMT_END_EXPORT
+
+/**
+  \rst
+  A view of a collection of formatting arguments. To avoid lifetime issues it
+  should only be used as a parameter type in type-erased functions such as
+  ``vformat``::
+
+    void vlog(string_view format_str, format_args args);  // OK
+    format_args args = make_format_args();  // Error: dangling reference
+  \endrst
+ */
+template <typename Context> class basic_format_args {
+ public:
+  using size_type = int;
+  using format_arg = basic_format_arg<Context>;
+
+ private:
+  // A descriptor that contains information about formatting arguments.
+  // If the number of arguments is less or equal to max_packed_args then
+  // argument types are passed in the descriptor. This reduces binary code size
+  // per formatting function call.
+  unsigned long long desc_;
+  union {
+    // If is_packed() returns true then argument values are stored in values_;
+    // otherwise they are stored in args_. This is done to improve cache
+    // locality and reduce compiled code size since storing larger objects
+    // may require more code (at least on x86-64) even if the same amount of
+    // data is actually copied to stack. It saves ~10% on the bloat test.
+    const detail::value<Context>* values_;
+    const format_arg* args_;
+  };
+
+  constexpr auto is_packed() const -> bool {
+    return (desc_ & detail::is_unpacked_bit) == 0;
+  }
+  auto has_named_args() const -> bool {
+    return (desc_ & detail::has_named_args_bit) != 0;
+  }
+
+  FMT_CONSTEXPR auto type(int index) const -> detail::type {
+    int shift = index * detail::packed_arg_bits;
+    unsigned int mask = (1 << detail::packed_arg_bits) - 1;
+    return static_cast<detail::type>((desc_ >> shift) & mask);
+  }
+
+  constexpr FMT_INLINE basic_format_args(unsigned long long desc,
+                                         const detail::value<Context>* values)
+      : desc_(desc), values_(values) {}
+  constexpr basic_format_args(unsigned long long desc, const format_arg* args)
+      : desc_(desc), args_(args) {}
+
+ public:
+  constexpr basic_format_args() : desc_(0), args_(nullptr) {}
+
+  /**
+   \rst
+   Constructs a `basic_format_args` object from `~fmt::format_arg_store`.
+   \endrst
+   */
+  template <typename... Args>
+  constexpr FMT_INLINE basic_format_args(
+      const format_arg_store<Context, Args...>& store)
+      : basic_format_args(format_arg_store<Context, Args...>::desc,
+                          store.data_.args()) {}
+
+  /**
+   \rst
+   Constructs a `basic_format_args` object from
+   `~fmt::dynamic_format_arg_store`.
+   \endrst
+   */
+  constexpr FMT_INLINE basic_format_args(
+      const dynamic_format_arg_store<Context>& store)
+      : basic_format_args(store.get_types(), store.data()) {}
+
+  /**
+   \rst
+   Constructs a `basic_format_args` object from a dynamic set of arguments.
+   \endrst
+   */
+  constexpr basic_format_args(const format_arg* args, int count)
+      : basic_format_args(detail::is_unpacked_bit | detail::to_unsigned(count),
+                          args) {}
+
+  /** Returns the argument with the specified id. */
+  FMT_CONSTEXPR auto get(int id) const -> format_arg {
+    format_arg arg;
+    if (!is_packed()) {
+      if (id < max_size()) arg = args_[id];
+      return arg;
+    }
+    if (id >= detail::max_packed_args) return arg;
+    arg.type_ = type(id);
+    if (arg.type_ == detail::type::none_type) return arg;
+    arg.value_ = values_[id];
+    return arg;
+  }
+
+  template <typename Char>
+  auto get(basic_string_view<Char> name) const -> format_arg {
+    int id = get_id(name);
+    return id >= 0 ? get(id) : format_arg();
+  }
+
+  template <typename Char>
+  auto get_id(basic_string_view<Char> name) const -> int {
+    if (!has_named_args()) return -1;
+    const auto& named_args =
+        (is_packed() ? values_[-1] : args_[-1].value_).named_args;
+    for (size_t i = 0; i < named_args.size; ++i) {
+      if (named_args.data[i].name == name) return named_args.data[i].id;
+    }
+    return -1;
+  }
+
+  auto max_size() const -> int {
+    unsigned long long max_packed = detail::max_packed_args;
+    return static_cast<int>(is_packed() ? max_packed
+                                        : desc_ & ~detail::is_unpacked_bit);
+  }
+};
+
+/** An alias to ``basic_format_args<format_context>``. */
+// A separate type would result in shorter symbols but break ABI compatibility
+// between clang and gcc on ARM (#1919).
+FMT_EXPORT using format_args = basic_format_args<format_context>;
+
+// We cannot use enum classes as bit fields because of a gcc bug, so we put them
+// in namespaces instead (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414).
+// Additionally, if an underlying type is specified, older gcc incorrectly warns
+// that the type is too small. Both bugs are fixed in gcc 9.3.
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 903
+#  define FMT_ENUM_UNDERLYING_TYPE(type)
+#else
+#  define FMT_ENUM_UNDERLYING_TYPE(type) : type
+#endif
+namespace align {
+enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, left, right, center,
+                                                  numeric};
+}
+using align_t = align::type;
+namespace sign {
+enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, minus, plus, space};
+}
+using sign_t = sign::type;
+
+namespace detail {
+
+// Workaround an array initialization issue in gcc 4.8.
+template <typename Char> struct fill_t {
+ private:
+  enum { max_size = 4 };
+  Char data_[max_size] = {Char(' '), Char(0), Char(0), Char(0)};
+  unsigned char size_ = 1;
+
+ public:
+  FMT_CONSTEXPR void operator=(basic_string_view<Char> s) {
+    auto size = s.size();
+    FMT_ASSERT(size <= max_size, "invalid fill");
+    for (size_t i = 0; i < size; ++i) data_[i] = s[i];
+    size_ = static_cast<unsigned char>(size);
+  }
+
+  constexpr auto size() const -> size_t { return size_; }
+  constexpr auto data() const -> const Char* { return data_; }
+
+  FMT_CONSTEXPR auto operator[](size_t index) -> Char& { return data_[index]; }
+  FMT_CONSTEXPR auto operator[](size_t index) const -> const Char& {
+    return data_[index];
+  }
+};
+}  // namespace detail
+
+enum class presentation_type : unsigned char {
+  none,
+  dec,             // 'd'
+  oct,             // 'o'
+  hex_lower,       // 'x'
+  hex_upper,       // 'X'
+  bin_lower,       // 'b'
+  bin_upper,       // 'B'
+  hexfloat_lower,  // 'a'
+  hexfloat_upper,  // 'A'
+  exp_lower,       // 'e'
+  exp_upper,       // 'E'
+  fixed_lower,     // 'f'
+  fixed_upper,     // 'F'
+  general_lower,   // 'g'
+  general_upper,   // 'G'
+  chr,             // 'c'
+  string,          // 's'
+  pointer,         // 'p'
+  debug            // '?'
+};
+
+// Format specifiers for built-in and string types.
+template <typename Char = char> struct format_specs {
+  int width;
+  int precision;
+  presentation_type type;
+  align_t align : 4;
+  sign_t sign : 3;
+  bool alt : 1;  // Alternate form ('#').
+  bool localized : 1;
+  detail::fill_t<Char> fill;
+
+  constexpr format_specs()
+      : width(0),
+        precision(-1),
+        type(presentation_type::none),
+        align(align::none),
+        sign(sign::none),
+        alt(false),
+        localized(false) {}
+};
+
+namespace detail {
+
+enum class arg_id_kind { none, index, name };
+
+// An argument reference.
+template <typename Char> struct arg_ref {
+  FMT_CONSTEXPR arg_ref() : kind(arg_id_kind::none), val() {}
+
+  FMT_CONSTEXPR explicit arg_ref(int index)
+      : kind(arg_id_kind::index), val(index) {}
+  FMT_CONSTEXPR explicit arg_ref(basic_string_view<Char> name)
+      : kind(arg_id_kind::name), val(name) {}
+
+  FMT_CONSTEXPR auto operator=(int idx) -> arg_ref& {
+    kind = arg_id_kind::index;
+    val.index = idx;
+    return *this;
+  }
+
+  arg_id_kind kind;
+  union value {
+    FMT_CONSTEXPR value(int idx = 0) : index(idx) {}
+    FMT_CONSTEXPR value(basic_string_view<Char> n) : name(n) {}
+
+    int index;
+    basic_string_view<Char> name;
+  } val;
+};
+
+// Format specifiers with width and precision resolved at formatting rather
+// than parsing time to allow reusing the same parsed specifiers with
+// different sets of arguments (precompilation of format strings).
+template <typename Char = char>
+struct dynamic_format_specs : format_specs<Char> {
+  arg_ref<Char> width_ref;
+  arg_ref<Char> precision_ref;
+};
+
+// Converts a character to ASCII. Returns '\0' on conversion failure.
+template <typename Char, FMT_ENABLE_IF(std::is_integral<Char>::value)>
+constexpr auto to_ascii(Char c) -> char {
+  return c <= 0xff ? static_cast<char>(c) : '\0';
+}
+template <typename Char, FMT_ENABLE_IF(std::is_enum<Char>::value)>
+constexpr auto to_ascii(Char c) -> char {
+  return c <= 0xff ? static_cast<char>(c) : '\0';
+}
+
+// Returns the number of code units in a code point or 1 on error.
+template <typename Char>
+FMT_CONSTEXPR auto code_point_length(const Char* begin) -> int {
+  if (const_check(sizeof(Char) != 1)) return 1;
+  auto c = static_cast<unsigned char>(*begin);
+  return static_cast<int>((0x3a55000000000000ull >> (2 * (c >> 3))) & 0x3) + 1;
+}
+
+// Return the result via the out param to workaround gcc bug 77539.
+template <bool IS_CONSTEXPR, typename T, typename Ptr = const T*>
+FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr& out) -> bool {
+  for (out = first; out != last; ++out) {
+    if (*out == value) return true;
+  }
+  return false;
+}
+
+template <>
+inline auto find<false, char>(const char* first, const char* last, char value,
+                              const char*& out) -> bool {
+  out = static_cast<const char*>(
+      std::memchr(first, value, to_unsigned(last - first)));
+  return out != nullptr;
+}
+
+// Parses the range [begin, end) as an unsigned integer. This function assumes
+// that the range is non-empty and the first character is a digit.
+template <typename Char>
+FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end,
+                                         int error_value) noexcept -> int {
+  FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', "");
+  unsigned value = 0, prev = 0;
+  auto p = begin;
+  do {
+    prev = value;
+    value = value * 10 + unsigned(*p - '0');
+    ++p;
+  } while (p != end && '0' <= *p && *p <= '9');
+  auto num_digits = p - begin;
+  begin = p;
+  if (num_digits <= std::numeric_limits<int>::digits10)
+    return static_cast<int>(value);
+  // Check for overflow.
+  const unsigned max = to_unsigned((std::numeric_limits<int>::max)());
+  return num_digits == std::numeric_limits<int>::digits10 + 1 &&
+                 prev * 10ull + unsigned(p[-1] - '0') <= max
+             ? static_cast<int>(value)
+             : error_value;
+}
+
+FMT_CONSTEXPR inline auto parse_align(char c) -> align_t {
+  switch (c) {
+  case '<':
+    return align::left;
+  case '>':
+    return align::right;
+  case '^':
+    return align::center;
+  }
+  return align::none;
+}
+
+template <typename Char> constexpr auto is_name_start(Char c) -> bool {
+  return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_';
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR auto do_parse_arg_id(const Char* begin, const Char* end,
+                                   Handler&& handler) -> const Char* {
+  Char c = *begin;
+  if (c >= '0' && c <= '9') {
+    int index = 0;
+    constexpr int max = (std::numeric_limits<int>::max)();
+    if (c != '0')
+      index = parse_nonnegative_int(begin, end, max);
+    else
+      ++begin;
+    if (begin == end || (*begin != '}' && *begin != ':'))
+      throw_format_error("invalid format string");
+    else
+      handler.on_index(index);
+    return begin;
+  }
+  if (!is_name_start(c)) {
+    throw_format_error("invalid format string");
+    return begin;
+  }
+  auto it = begin;
+  do {
+    ++it;
+  } while (it != end && (is_name_start(*it) || ('0' <= *it && *it <= '9')));
+  handler.on_name({begin, to_unsigned(it - begin)});
+  return it;
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR FMT_INLINE auto parse_arg_id(const Char* begin, const Char* end,
+                                           Handler&& handler) -> const Char* {
+  FMT_ASSERT(begin != end, "");
+  Char c = *begin;
+  if (c != '}' && c != ':') return do_parse_arg_id(begin, end, handler);
+  handler.on_auto();
+  return begin;
+}
+
+template <typename Char> struct dynamic_spec_id_handler {
+  basic_format_parse_context<Char>& ctx;
+  arg_ref<Char>& ref;
+
+  FMT_CONSTEXPR void on_auto() {
+    int id = ctx.next_arg_id();
+    ref = arg_ref<Char>(id);
+    ctx.check_dynamic_spec(id);
+  }
+  FMT_CONSTEXPR void on_index(int id) {
+    ref = arg_ref<Char>(id);
+    ctx.check_arg_id(id);
+    ctx.check_dynamic_spec(id);
+  }
+  FMT_CONSTEXPR void on_name(basic_string_view<Char> id) {
+    ref = arg_ref<Char>(id);
+    ctx.check_arg_id(id);
+  }
+};
+
+// Parses [integer | "{" [arg_id] "}"].
+template <typename Char>
+FMT_CONSTEXPR auto parse_dynamic_spec(const Char* begin, const Char* end,
+                                      int& value, arg_ref<Char>& ref,
+                                      basic_format_parse_context<Char>& ctx)
+    -> const Char* {
+  FMT_ASSERT(begin != end, "");
+  if ('0' <= *begin && *begin <= '9') {
+    int val = parse_nonnegative_int(begin, end, -1);
+    if (val != -1)
+      value = val;
+    else
+      throw_format_error("number is too big");
+  } else if (*begin == '{') {
+    ++begin;
+    auto handler = dynamic_spec_id_handler<Char>{ctx, ref};
+    if (begin != end) begin = parse_arg_id(begin, end, handler);
+    if (begin != end && *begin == '}') return ++begin;
+    throw_format_error("invalid format string");
+  }
+  return begin;
+}
+
+template <typename Char>
+FMT_CONSTEXPR auto parse_precision(const Char* begin, const Char* end,
+                                   int& value, arg_ref<Char>& ref,
+                                   basic_format_parse_context<Char>& ctx)
+    -> const Char* {
+  ++begin;
+  if (begin == end || *begin == '}') {
+    throw_format_error("invalid precision");
+    return begin;
+  }
+  return parse_dynamic_spec(begin, end, value, ref, ctx);
+}
+
+enum class state { start, align, sign, hash, zero, width, precision, locale };
+
+// Parses standard format specifiers.
+template <typename Char>
+FMT_CONSTEXPR FMT_INLINE auto parse_format_specs(
+    const Char* begin, const Char* end, dynamic_format_specs<Char>& specs,
+    basic_format_parse_context<Char>& ctx, type arg_type) -> const Char* {
+  auto c = '\0';
+  if (end - begin > 1) {
+    auto next = to_ascii(begin[1]);
+    c = parse_align(next) == align::none ? to_ascii(*begin) : '\0';
+  } else {
+    if (begin == end) return begin;
+    c = to_ascii(*begin);
+  }
+
+  struct {
+    state current_state = state::start;
+    FMT_CONSTEXPR void operator()(state s, bool valid = true) {
+      if (current_state >= s || !valid)
+        throw_format_error("invalid format specifier");
+      current_state = s;
+    }
+  } enter_state;
+
+  using pres = presentation_type;
+  constexpr auto integral_set = sint_set | uint_set | bool_set | char_set;
+  struct {
+    const Char*& begin;
+    dynamic_format_specs<Char>& specs;
+    type arg_type;
+
+    FMT_CONSTEXPR auto operator()(pres type, int set) -> const Char* {
+      if (!in(arg_type, set)) throw_format_error("invalid format specifier");
+      specs.type = type;
+      return begin + 1;
+    }
+  } parse_presentation_type{begin, specs, arg_type};
+
+  for (;;) {
+    switch (c) {
+    case '<':
+    case '>':
+    case '^':
+      enter_state(state::align);
+      specs.align = parse_align(c);
+      ++begin;
+      break;
+    case '+':
+    case '-':
+    case ' ':
+      enter_state(state::sign, in(arg_type, sint_set | float_set));
+      switch (c) {
+      case '+':
+        specs.sign = sign::plus;
+        break;
+      case '-':
+        specs.sign = sign::minus;
+        break;
+      case ' ':
+        specs.sign = sign::space;
+        break;
+      }
+      ++begin;
+      break;
+    case '#':
+      enter_state(state::hash, is_arithmetic_type(arg_type));
+      specs.alt = true;
+      ++begin;
+      break;
+    case '0':
+      enter_state(state::zero);
+      if (!is_arithmetic_type(arg_type))
+        throw_format_error("format specifier requires numeric argument");
+      if (specs.align == align::none) {
+        // Ignore 0 if align is specified for compatibility with std::format.
+        specs.align = align::numeric;
+        specs.fill[0] = Char('0');
+      }
+      ++begin;
+      break;
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+    case '{':
+      enter_state(state::width);
+      begin = parse_dynamic_spec(begin, end, specs.width, specs.width_ref, ctx);
+      break;
+    case '.':
+      enter_state(state::precision,
+                  in(arg_type, float_set | string_set | cstring_set));
+      begin = parse_precision(begin, end, specs.precision, specs.precision_ref,
+                              ctx);
+      break;
+    case 'L':
+      enter_state(state::locale, is_arithmetic_type(arg_type));
+      specs.localized = true;
+      ++begin;
+      break;
+    case 'd':
+      return parse_presentation_type(pres::dec, integral_set);
+    case 'o':
+      return parse_presentation_type(pres::oct, integral_set);
+    case 'x':
+      return parse_presentation_type(pres::hex_lower, integral_set);
+    case 'X':
+      return parse_presentation_type(pres::hex_upper, integral_set);
+    case 'b':
+      return parse_presentation_type(pres::bin_lower, integral_set);
+    case 'B':
+      return parse_presentation_type(pres::bin_upper, integral_set);
+    case 'a':
+      return parse_presentation_type(pres::hexfloat_lower, float_set);
+    case 'A':
+      return parse_presentation_type(pres::hexfloat_upper, float_set);
+    case 'e':
+      return parse_presentation_type(pres::exp_lower, float_set);
+    case 'E':
+      return parse_presentation_type(pres::exp_upper, float_set);
+    case 'f':
+      return parse_presentation_type(pres::fixed_lower, float_set);
+    case 'F':
+      return parse_presentation_type(pres::fixed_upper, float_set);
+    case 'g':
+      return parse_presentation_type(pres::general_lower, float_set);
+    case 'G':
+      return parse_presentation_type(pres::general_upper, float_set);
+    case 'c':
+      return parse_presentation_type(pres::chr, integral_set);
+    case 's':
+      return parse_presentation_type(pres::string,
+                                     bool_set | string_set | cstring_set);
+    case 'p':
+      return parse_presentation_type(pres::pointer, pointer_set | cstring_set);
+    case '?':
+      return parse_presentation_type(pres::debug,
+                                     char_set | string_set | cstring_set);
+    case '}':
+      return begin;
+    default: {
+      if (*begin == '}') return begin;
+      // Parse fill and alignment.
+      auto fill_end = begin + code_point_length(begin);
+      if (end - fill_end <= 0) {
+        throw_format_error("invalid format specifier");
+        return begin;
+      }
+      if (*begin == '{') {
+        throw_format_error("invalid fill character '{'");
+        return begin;
+      }
+      auto align = parse_align(to_ascii(*fill_end));
+      enter_state(state::align, align != align::none);
+      specs.fill = {begin, to_unsigned(fill_end - begin)};
+      specs.align = align;
+      begin = fill_end + 1;
+    }
+    }
+    if (begin == end) return begin;
+    c = to_ascii(*begin);
+  }
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR auto parse_replacement_field(const Char* begin, const Char* end,
+                                           Handler&& handler) -> const Char* {
+  struct id_adapter {
+    Handler& handler;
+    int arg_id;
+
+    FMT_CONSTEXPR void on_auto() { arg_id = handler.on_arg_id(); }
+    FMT_CONSTEXPR void on_index(int id) { arg_id = handler.on_arg_id(id); }
+    FMT_CONSTEXPR void on_name(basic_string_view<Char> id) {
+      arg_id = handler.on_arg_id(id);
+    }
+  };
+
+  ++begin;
+  if (begin == end) return handler.on_error("invalid format string"), end;
+  if (*begin == '}') {
+    handler.on_replacement_field(handler.on_arg_id(), begin);
+  } else if (*begin == '{') {
+    handler.on_text(begin, begin + 1);
+  } else {
+    auto adapter = id_adapter{handler, 0};
+    begin = parse_arg_id(begin, end, adapter);
+    Char c = begin != end ? *begin : Char();
+    if (c == '}') {
+      handler.on_replacement_field(adapter.arg_id, begin);
+    } else if (c == ':') {
+      begin = handler.on_format_specs(adapter.arg_id, begin + 1, end);
+      if (begin == end || *begin != '}')
+        return handler.on_error("unknown format specifier"), end;
+    } else {
+      return handler.on_error("missing '}' in format string"), end;
+    }
+  }
+  return begin + 1;
+}
+
+template <bool IS_CONSTEXPR, typename Char, typename Handler>
+FMT_CONSTEXPR FMT_INLINE void parse_format_string(
+    basic_string_view<Char> format_str, Handler&& handler) {
+  auto begin = format_str.data();
+  auto end = begin + format_str.size();
+  if (end - begin < 32) {
+    // Use a simple loop instead of memchr for small strings.
+    const Char* p = begin;
+    while (p != end) {
+      auto c = *p++;
+      if (c == '{') {
+        handler.on_text(begin, p - 1);
+        begin = p = parse_replacement_field(p - 1, end, handler);
+      } else if (c == '}') {
+        if (p == end || *p != '}')
+          return handler.on_error("unmatched '}' in format string");
+        handler.on_text(begin, p);
+        begin = ++p;
+      }
+    }
+    handler.on_text(begin, end);
+    return;
+  }
+  struct writer {
+    FMT_CONSTEXPR void operator()(const Char* from, const Char* to) {
+      if (from == to) return;
+      for (;;) {
+        const Char* p = nullptr;
+        if (!find<IS_CONSTEXPR>(from, to, Char('}'), p))
+          return handler_.on_text(from, to);
+        ++p;
+        if (p == to || *p != '}')
+          return handler_.on_error("unmatched '}' in format string");
+        handler_.on_text(from, p);
+        from = p + 1;
+      }
+    }
+    Handler& handler_;
+  } write = {handler};
+  while (begin != end) {
+    // Doing two passes with memchr (one for '{' and another for '}') is up to
+    // 2.5x faster than the naive one-pass implementation on big format strings.
+    const Char* p = begin;
+    if (*begin != '{' && !find<IS_CONSTEXPR>(begin + 1, end, Char('{'), p))
+      return write(begin, end);
+    write(begin, p);
+    begin = parse_replacement_field(p, end, handler);
+  }
+}
+
+template <typename T, bool = is_named_arg<T>::value> struct strip_named_arg {
+  using type = T;
+};
+template <typename T> struct strip_named_arg<T, true> {
+  using type = remove_cvref_t<decltype(T::value)>;
+};
+
+template <typename T, typename ParseContext>
+FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx)
+    -> decltype(ctx.begin()) {
+  using char_type = typename ParseContext::char_type;
+  using context = buffer_context<char_type>;
+  using mapped_type = conditional_t<
+      mapped_type_constant<T, context>::value != type::custom_type,
+      decltype(arg_mapper<context>().map(std::declval<const T&>())),
+      typename strip_named_arg<T>::type>;
+#if defined(__cpp_if_constexpr)
+  if constexpr (std::is_default_constructible_v<
+                    formatter<mapped_type, char_type>>) {
+    return formatter<mapped_type, char_type>().parse(ctx);
+  } else {
+    type_is_unformattable_for<T, char_type> _;
+    return ctx.begin();
+  }
+#else
+  return formatter<mapped_type, char_type>().parse(ctx);
+#endif
+}
+
+// Checks char specs and returns true iff the presentation type is char-like.
+template <typename Char>
+FMT_CONSTEXPR auto check_char_specs(const format_specs<Char>& specs) -> bool {
+  if (specs.type != presentation_type::none &&
+      specs.type != presentation_type::chr &&
+      specs.type != presentation_type::debug) {
+    return false;
+  }
+  if (specs.align == align::numeric || specs.sign != sign::none || specs.alt)
+    throw_format_error("invalid format specifier for char");
+  return true;
+}
+
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+template <int N, typename T, typename... Args, typename Char>
+constexpr auto get_arg_index_by_name(basic_string_view<Char> name) -> int {
+  if constexpr (is_statically_named_arg<T>()) {
+    if (name == T::name) return N;
+  }
+  if constexpr (sizeof...(Args) > 0)
+    return get_arg_index_by_name<N + 1, Args...>(name);
+  (void)name;  // Workaround an MSVC bug about "unused" parameter.
+  return -1;
+}
+#endif
+
+template <typename... Args, typename Char>
+FMT_CONSTEXPR auto get_arg_index_by_name(basic_string_view<Char> name) -> int {
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+  if constexpr (sizeof...(Args) > 0)
+    return get_arg_index_by_name<0, Args...>(name);
+#endif
+  (void)name;
+  return -1;
+}
+
+template <typename Char, typename... Args> class format_string_checker {
+ private:
+  using parse_context_type = compile_parse_context<Char>;
+  static constexpr int num_args = sizeof...(Args);
+
+  // Format specifier parsing function.
+  // In the future basic_format_parse_context will replace compile_parse_context
+  // here and will use is_constant_evaluated and downcasting to access the data
+  // needed for compile-time checks: https://godbolt.org/z/GvWzcTjh1.
+  using parse_func = const Char* (*)(parse_context_type&);
+
+  type types_[num_args > 0 ? static_cast<size_t>(num_args) : 1];
+  parse_context_type context_;
+  parse_func parse_funcs_[num_args > 0 ? static_cast<size_t>(num_args) : 1];
+
+ public:
+  explicit FMT_CONSTEXPR format_string_checker(basic_string_view<Char> fmt)
+      : types_{mapped_type_constant<Args, buffer_context<Char>>::value...},
+        context_(fmt, num_args, types_),
+        parse_funcs_{&parse_format_specs<Args, parse_context_type>...} {}
+
+  FMT_CONSTEXPR void on_text(const Char*, const Char*) {}
+
+  FMT_CONSTEXPR auto on_arg_id() -> int { return context_.next_arg_id(); }
+  FMT_CONSTEXPR auto on_arg_id(int id) -> int {
+    return context_.check_arg_id(id), id;
+  }
+  FMT_CONSTEXPR auto on_arg_id(basic_string_view<Char> id) -> int {
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+    auto index = get_arg_index_by_name<Args...>(id);
+    if (index < 0) on_error("named argument is not found");
+    return index;
+#else
+    (void)id;
+    on_error("compile-time checks for named arguments require C++20 support");
+    return 0;
+#endif
+  }
+
+  FMT_CONSTEXPR void on_replacement_field(int id, const Char* begin) {
+    on_format_specs(id, begin, begin);  // Call parse() on empty specs.
+  }
+
+  FMT_CONSTEXPR auto on_format_specs(int id, const Char* begin, const Char*)
+      -> const Char* {
+    context_.advance_to(begin);
+    // id >= 0 check is a workaround for gcc 10 bug (#2065).
+    return id >= 0 && id < num_args ? parse_funcs_[id](context_) : begin;
+  }
+
+  FMT_CONSTEXPR void on_error(const char* message) {
+    throw_format_error(message);
+  }
+};
+
+// Reports a compile-time error if S is not a valid format string.
+template <typename..., typename S, FMT_ENABLE_IF(!is_compile_string<S>::value)>
+FMT_INLINE void check_format_string(const S&) {
+#ifdef FMT_ENFORCE_COMPILE_STRING
+  static_assert(is_compile_string<S>::value,
+                "FMT_ENFORCE_COMPILE_STRING requires all format strings to use "
+                "FMT_STRING.");
+#endif
+}
+template <typename... Args, typename S,
+          FMT_ENABLE_IF(is_compile_string<S>::value)>
+void check_format_string(S format_str) {
+  using char_t = typename S::char_type;
+  FMT_CONSTEXPR auto s = basic_string_view<char_t>(format_str);
+  using checker = format_string_checker<char_t, remove_cvref_t<Args>...>;
+  FMT_CONSTEXPR bool error = (parse_format_string<true>(s, checker(s)), true);
+  ignore_unused(error);
+}
+
+template <typename Char = char> struct vformat_args {
+  using type = basic_format_args<
+      basic_format_context<std::back_insert_iterator<buffer<Char>>, Char>>;
+};
+template <> struct vformat_args<char> { using type = format_args; };
+
+// Use vformat_args and avoid type_identity to keep symbols short.
+template <typename Char>
+void vformat_to(buffer<Char>& buf, basic_string_view<Char> fmt,
+                typename vformat_args<Char>::type args, locale_ref loc = {});
+
+FMT_API void vprint_mojibake(std::FILE*, string_view, format_args);
+#ifndef _WIN32
+inline void vprint_mojibake(std::FILE*, string_view, format_args) {}
+#endif
+}  // namespace detail
+
+FMT_BEGIN_EXPORT
+
+// A formatter specialization for natively supported types.
+template <typename T, typename Char>
+struct formatter<T, Char,
+                 enable_if_t<detail::type_constant<T, Char>::value !=
+                             detail::type::custom_type>> {
+ private:
+  detail::dynamic_format_specs<Char> specs_;
+
+ public:
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> const Char* {
+    auto type = detail::type_constant<T, Char>::value;
+    auto end =
+        detail::parse_format_specs(ctx.begin(), ctx.end(), specs_, ctx, type);
+    if (type == detail::type::char_type) detail::check_char_specs(specs_);
+    return end;
+  }
+
+  template <detail::type U = detail::type_constant<T, Char>::value,
+            FMT_ENABLE_IF(U == detail::type::string_type ||
+                          U == detail::type::cstring_type ||
+                          U == detail::type::char_type)>
+  FMT_CONSTEXPR void set_debug_format(bool set = true) {
+    specs_.type = set ? presentation_type::debug : presentation_type::none;
+  }
+
+  template <typename FormatContext>
+  FMT_CONSTEXPR auto format(const T& val, FormatContext& ctx) const
+      -> decltype(ctx.out());
+};
+
+template <typename Char = char> struct runtime_format_string {
+  basic_string_view<Char> str;
+};
+
+/** A compile-time format string. */
+template <typename Char, typename... Args> class basic_format_string {
+ private:
+  basic_string_view<Char> str_;
+
+ public:
+  template <typename S,
+            FMT_ENABLE_IF(
+                std::is_convertible<const S&, basic_string_view<Char>>::value)>
+  FMT_CONSTEVAL FMT_INLINE basic_format_string(const S& s) : str_(s) {
+    static_assert(
+        detail::count<
+            (std::is_base_of<detail::view, remove_reference_t<Args>>::value &&
+             std::is_reference<Args>::value)...>() == 0,
+        "passing views as lvalues is disallowed");
+#ifdef FMT_HAS_CONSTEVAL
+    if constexpr (detail::count_named_args<Args...>() ==
+                  detail::count_statically_named_args<Args...>()) {
+      using checker =
+          detail::format_string_checker<Char, remove_cvref_t<Args>...>;
+      detail::parse_format_string<true>(str_, checker(s));
+    }
+#else
+    detail::check_format_string<Args...>(s);
+#endif
+  }
+  basic_format_string(runtime_format_string<Char> fmt) : str_(fmt.str) {}
+
+  FMT_INLINE operator basic_string_view<Char>() const { return str_; }
+  FMT_INLINE auto get() const -> basic_string_view<Char> { return str_; }
+};
+
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
+// Workaround broken conversion on older gcc.
+template <typename...> using format_string = string_view;
+inline auto runtime(string_view s) -> string_view { return s; }
+#else
+template <typename... Args>
+using format_string = basic_format_string<char, type_identity_t<Args>...>;
+/**
+  \rst
+  Creates a runtime format string.
+
+  **Example**::
+
+    // Check format string at runtime instead of compile-time.
+    fmt::print(fmt::runtime("{:d}"), "I am not a number");
+  \endrst
+ */
+inline auto runtime(string_view s) -> runtime_format_string<> { return {{s}}; }
+#endif
+
+FMT_API auto vformat(string_view fmt, format_args args) -> std::string;
+
+/**
+  \rst
+  Formats ``args`` according to specifications in ``fmt`` and returns the result
+  as a string.
+
+  **Example**::
+
+    #include <fmt/core.h>
+    std::string message = fmt::format("The answer is {}.", 42);
+  \endrst
+*/
+template <typename... T>
+FMT_NODISCARD FMT_INLINE auto format(format_string<T...> fmt, T&&... args)
+    -> std::string {
+  return vformat(fmt, fmt::make_format_args(args...));
+}
+
+/** Formats a string and writes the output to ``out``. */
+template <typename OutputIt,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+auto vformat_to(OutputIt out, string_view fmt, format_args args) -> OutputIt {
+  auto&& buf = detail::get_buffer<char>(out);
+  detail::vformat_to(buf, fmt, args, {});
+  return detail::get_iterator(buf, out);
+}
+
+/**
+ \rst
+ Formats ``args`` according to specifications in ``fmt``, writes the result to
+ the output iterator ``out`` and returns the iterator past the end of the output
+ range. `format_to` does not append a terminating null character.
+
+ **Example**::
+
+   auto out = std::vector<char>();
+   fmt::format_to(std::back_inserter(out), "{}", 42);
+ \endrst
+ */
+template <typename OutputIt, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+FMT_INLINE auto format_to(OutputIt out, format_string<T...> fmt, T&&... args)
+    -> OutputIt {
+  return vformat_to(out, fmt, fmt::make_format_args(args...));
+}
+
+template <typename OutputIt> struct format_to_n_result {
+  /** Iterator past the end of the output range. */
+  OutputIt out;
+  /** Total (not truncated) output size. */
+  size_t size;
+};
+
+template <typename OutputIt, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+auto vformat_to_n(OutputIt out, size_t n, string_view fmt, format_args args)
+    -> format_to_n_result<OutputIt> {
+  using traits = detail::fixed_buffer_traits;
+  auto buf = detail::iterator_buffer<OutputIt, char, traits>(out, n);
+  detail::vformat_to(buf, fmt, args, {});
+  return {buf.out(), buf.count()};
+}
+
+/**
+  \rst
+  Formats ``args`` according to specifications in ``fmt``, writes up to ``n``
+  characters of the result to the output iterator ``out`` and returns the total
+  (not truncated) output size and the iterator past the end of the output range.
+  `format_to_n` does not append a terminating null character.
+  \endrst
+ */
+template <typename OutputIt, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+FMT_INLINE auto format_to_n(OutputIt out, size_t n, format_string<T...> fmt,
+                            T&&... args) -> format_to_n_result<OutputIt> {
+  return vformat_to_n(out, n, fmt, fmt::make_format_args(args...));
+}
+
+/** Returns the number of chars in the output of ``format(fmt, args...)``. */
+template <typename... T>
+FMT_NODISCARD FMT_INLINE auto formatted_size(format_string<T...> fmt,
+                                             T&&... args) -> size_t {
+  auto buf = detail::counting_buffer<>();
+  detail::vformat_to<char>(buf, fmt, fmt::make_format_args(args...), {});
+  return buf.count();
+}
+
+FMT_API void vprint(string_view fmt, format_args args);
+FMT_API void vprint(std::FILE* f, string_view fmt, format_args args);
+
+/**
+  \rst
+  Formats ``args`` according to specifications in ``fmt`` and writes the output
+  to ``stdout``.
+
+  **Example**::
+
+    fmt::print("Elapsed time: {0:.2f} seconds", 1.23);
+  \endrst
+ */
+template <typename... T>
+FMT_INLINE void print(format_string<T...> fmt, T&&... args) {
+  const auto& vargs = fmt::make_format_args(args...);
+  return detail::is_utf8() ? vprint(fmt, vargs)
+                           : detail::vprint_mojibake(stdout, fmt, vargs);
+}
+
+/**
+  \rst
+  Formats ``args`` according to specifications in ``fmt`` and writes the
+  output to the file ``f``.
+
+  **Example**::
+
+    fmt::print(stderr, "Don't {}!", "panic");
+  \endrst
+ */
+template <typename... T>
+FMT_INLINE void print(std::FILE* f, format_string<T...> fmt, T&&... args) {
+  const auto& vargs = fmt::make_format_args(args...);
+  return detail::is_utf8() ? vprint(f, fmt, vargs)
+                           : detail::vprint_mojibake(f, fmt, vargs);
+}
+
+/**
+  Formats ``args`` according to specifications in ``fmt`` and writes the
+  output to the file ``f`` followed by a newline.
+ */
+template <typename... T>
+FMT_INLINE void println(std::FILE* f, format_string<T...> fmt, T&&... args) {
+  return fmt::print(f, "{}\n", fmt::format(fmt, std::forward<T>(args)...));
+}
+
+/**
+  Formats ``args`` according to specifications in ``fmt`` and writes the output
+  to ``stdout`` followed by a newline.
+ */
+template <typename... T>
+FMT_INLINE void println(format_string<T...> fmt, T&&... args) {
+  return fmt::println(stdout, fmt, std::forward<T>(args)...);
+}
+
+FMT_END_EXPORT
+FMT_GCC_PRAGMA("GCC pop_options")
+FMT_END_NAMESPACE
+
+#ifdef FMT_HEADER_ONLY
+#  include "format.h"
+#endif
+#endif  // FMT_CORE_H_
diff --git a/Genie/Genie/src/qualla/include/fmt/format-inl.h b/Genie/Genie/src/qualla/include/fmt/format-inl.h
new file mode 100644
index 0000000000000000000000000000000000000000..dac2d437a41ab7b0b4e72895212b5a972ada73a9
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/fmt/format-inl.h
@@ -0,0 +1,1662 @@
+// Formatting library for C++ - implementation
+//
+// Copyright (c) 2012 - 2016, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_FORMAT_INL_H_
+#define FMT_FORMAT_INL_H_
+
+#include <algorithm>
+#include <cerrno>  // errno
+#include <climits>
+#include <cmath>
+#include <exception>
+
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
+#  include <locale>
+#endif
+
+#ifdef _WIN32
+#  include <io.h>  // _isatty
+#endif
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+namespace detail {
+
+FMT_FUNC void assert_fail(const char* file, int line, const char* message) {
+  // Use unchecked std::fprintf to avoid triggering another assertion when
+  // writing to stderr fails
+  std::fprintf(stderr, "%s:%d: assertion failed: %s", file, line, message);
+  // Chosen instead of std::abort to satisfy Clang in CUDA mode during device
+  // code pass.
+  std::terminate();
+}
+
+FMT_FUNC void throw_format_error(const char* message) {
+  FMT_THROW(format_error(message));
+}
+
+FMT_FUNC void format_error_code(detail::buffer<char>& out, int error_code,
+                                string_view message) noexcept {
+  // Report error code making sure that the output fits into
+  // inline_buffer_size to avoid dynamic memory allocation and potential
+  // bad_alloc.
+  out.try_resize(0);
+  static const char SEP[] = ": ";
+  static const char ERROR_STR[] = "error ";
+  // Subtract 2 to account for terminating null characters in SEP and ERROR_STR.
+  size_t error_code_size = sizeof(SEP) + sizeof(ERROR_STR) - 2;
+  auto abs_value = static_cast<uint32_or_64_or_128_t<int>>(error_code);
+  if (detail::is_negative(error_code)) {
+    abs_value = 0 - abs_value;
+    ++error_code_size;
+  }
+  error_code_size += detail::to_unsigned(detail::count_digits(abs_value));
+  auto it = buffer_appender<char>(out);
+  if (message.size() <= inline_buffer_size - error_code_size)
+    format_to(it, FMT_STRING("{}{}"), message, SEP);
+  format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code);
+  FMT_ASSERT(out.size() <= inline_buffer_size, "");
+}
+
+FMT_FUNC void report_error(format_func func, int error_code,
+                           const char* message) noexcept {
+  memory_buffer full_message;
+  func(full_message, error_code, message);
+  // Don't use fwrite_fully because the latter may throw.
+  if (std::fwrite(full_message.data(), full_message.size(), 1, stderr) > 0)
+    std::fputc('\n', stderr);
+}
+
+// A wrapper around fwrite that throws on error.
+inline void fwrite_fully(const void* ptr, size_t size, size_t count,
+                         FILE* stream) {
+  size_t written = std::fwrite(ptr, size, count, stream);
+  if (written < count)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot write to file")));
+}
+
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
+template <typename Locale>
+locale_ref::locale_ref(const Locale& loc) : locale_(&loc) {
+  static_assert(std::is_same<Locale, std::locale>::value, "");
+}
+
+template <typename Locale> Locale locale_ref::get() const {
+  static_assert(std::is_same<Locale, std::locale>::value, "");
+  return locale_ ? *static_cast<const std::locale*>(locale_) : std::locale();
+}
+
+template <typename Char>
+FMT_FUNC auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result<Char> {
+  auto& facet = std::use_facet<std::numpunct<Char>>(loc.get<std::locale>());
+  auto grouping = facet.grouping();
+  auto thousands_sep = grouping.empty() ? Char() : facet.thousands_sep();
+  return {std::move(grouping), thousands_sep};
+}
+template <typename Char> FMT_FUNC Char decimal_point_impl(locale_ref loc) {
+  return std::use_facet<std::numpunct<Char>>(loc.get<std::locale>())
+      .decimal_point();
+}
+#else
+template <typename Char>
+FMT_FUNC auto thousands_sep_impl(locale_ref) -> thousands_sep_result<Char> {
+  return {"\03", FMT_STATIC_THOUSANDS_SEPARATOR};
+}
+template <typename Char> FMT_FUNC Char decimal_point_impl(locale_ref) {
+  return '.';
+}
+#endif
+
+FMT_FUNC auto write_loc(appender out, loc_value value,
+                        const format_specs<>& specs, locale_ref loc) -> bool {
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
+  auto locale = loc.get<std::locale>();
+  // We cannot use the num_put<char> facet because it may produce output in
+  // a wrong encoding.
+  using facet = format_facet<std::locale>;
+  if (std::has_facet<facet>(locale))
+    return std::use_facet<facet>(locale).put(out, value, specs);
+  return facet(locale).put(out, value, specs);
+#endif
+  return false;
+}
+}  // namespace detail
+
+template <typename Locale> typename Locale::id format_facet<Locale>::id;
+
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
+template <typename Locale> format_facet<Locale>::format_facet(Locale& loc) {
+  auto& numpunct = std::use_facet<std::numpunct<char>>(loc);
+  grouping_ = numpunct.grouping();
+  if (!grouping_.empty()) separator_ = std::string(1, numpunct.thousands_sep());
+}
+
+template <>
+FMT_API FMT_FUNC auto format_facet<std::locale>::do_put(
+    appender out, loc_value val, const format_specs<>& specs) const -> bool {
+  return val.visit(
+      detail::loc_writer<>{out, specs, separator_, grouping_, decimal_point_});
+}
+#endif
+
+FMT_FUNC std::system_error vsystem_error(int error_code, string_view fmt,
+                                         format_args args) {
+  auto ec = std::error_code(error_code, std::generic_category());
+  return std::system_error(ec, vformat(fmt, args));
+}
+
+namespace detail {
+
+template <typename F> inline bool operator==(basic_fp<F> x, basic_fp<F> y) {
+  return x.f == y.f && x.e == y.e;
+}
+
+// Compilers should be able to optimize this into the ror instruction.
+FMT_CONSTEXPR inline uint32_t rotr(uint32_t n, uint32_t r) noexcept {
+  r &= 31;
+  return (n >> r) | (n << (32 - r));
+}
+FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept {
+  r &= 63;
+  return (n >> r) | (n << (64 - r));
+}
+
+// Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox.
+namespace dragonbox {
+// Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a
+// 64-bit unsigned integer.
+inline uint64_t umul96_upper64(uint32_t x, uint64_t y) noexcept {
+  return umul128_upper64(static_cast<uint64_t>(x) << 32, y);
+}
+
+// Computes lower 128 bits of multiplication of a 64-bit unsigned integer and a
+// 128-bit unsigned integer.
+inline uint128_fallback umul192_lower128(uint64_t x,
+                                         uint128_fallback y) noexcept {
+  uint64_t high = x * y.high();
+  uint128_fallback high_low = umul128(x, y.low());
+  return {high + high_low.high(), high_low.low()};
+}
+
+// Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a
+// 64-bit unsigned integer.
+inline uint64_t umul96_lower64(uint32_t x, uint64_t y) noexcept {
+  return x * y;
+}
+
+// Various fast log computations.
+inline int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept {
+  FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent");
+  return (e * 631305 - 261663) >> 21;
+}
+
+FMT_INLINE_VARIABLE constexpr struct {
+  uint32_t divisor;
+  int shift_amount;
+} div_small_pow10_infos[] = {{10, 16}, {100, 16}};
+
+// Replaces n by floor(n / pow(10, N)) returning true if and only if n is
+// divisible by pow(10, N).
+// Precondition: n <= pow(10, N + 1).
+template <int N>
+bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept {
+  // The numbers below are chosen such that:
+  //   1. floor(n/d) = floor(nm / 2^k) where d=10 or d=100,
+  //   2. nm mod 2^k < m if and only if n is divisible by d,
+  // where m is magic_number, k is shift_amount
+  // and d is divisor.
+  //
+  // Item 1 is a common technique of replacing division by a constant with
+  // multiplication, see e.g. "Division by Invariant Integers Using
+  // Multiplication" by Granlund and Montgomery (1994). magic_number (m) is set
+  // to ceil(2^k/d) for large enough k.
+  // The idea for item 2 originates from Schubfach.
+  constexpr auto info = div_small_pow10_infos[N - 1];
+  FMT_ASSERT(n <= info.divisor * 10, "n is too large");
+  constexpr uint32_t magic_number =
+      (1u << info.shift_amount) / info.divisor + 1;
+  n *= magic_number;
+  const uint32_t comparison_mask = (1u << info.shift_amount) - 1;
+  bool result = (n & comparison_mask) < magic_number;
+  n >>= info.shift_amount;
+  return result;
+}
+
+// Computes floor(n / pow(10, N)) for small n and N.
+// Precondition: n <= pow(10, N + 1).
+template <int N> uint32_t small_division_by_pow10(uint32_t n) noexcept {
+  constexpr auto info = div_small_pow10_infos[N - 1];
+  FMT_ASSERT(n <= info.divisor * 10, "n is too large");
+  constexpr uint32_t magic_number =
+      (1u << info.shift_amount) / info.divisor + 1;
+  return (n * magic_number) >> info.shift_amount;
+}
+
+// Computes floor(n / 10^(kappa + 1)) (float)
+inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) noexcept {
+  // 1374389535 = ceil(2^37/100)
+  return static_cast<uint32_t>((static_cast<uint64_t>(n) * 1374389535) >> 37);
+}
+// Computes floor(n / 10^(kappa + 1)) (double)
+inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) noexcept {
+  // 2361183241434822607 = ceil(2^(64+7)/1000)
+  return umul128_upper64(n, 2361183241434822607ull) >> 7;
+}
+
+// Various subroutines using pow10 cache
+template <typename T> struct cache_accessor;
+
+template <> struct cache_accessor<float> {
+  using carrier_uint = float_info<float>::carrier_uint;
+  using cache_entry_type = uint64_t;
+
+  static uint64_t get_cached_power(int k) noexcept {
+    FMT_ASSERT(k >= float_info<float>::min_k && k <= float_info<float>::max_k,
+               "k is out of range");
+    static constexpr const uint64_t pow10_significands[] = {
+        0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f,
+        0xfd87b5f28300ca0e, 0x9e74d1b791e07e49, 0xc612062576589ddb,
+        0xf79687aed3eec552, 0x9abe14cd44753b53, 0xc16d9a0095928a28,
+        0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb,
+        0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a,
+        0xe69594bec44de15c, 0x901d7cf73ab0acda, 0xb424dc35095cd810,
+        0xe12e13424bb40e14, 0x8cbccc096f5088cc, 0xafebff0bcb24aaff,
+        0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd,
+        0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424,
+        0xd1b71758e219652c, 0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b,
+        0xcccccccccccccccd, 0x8000000000000000, 0xa000000000000000,
+        0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000,
+        0xc350000000000000, 0xf424000000000000, 0x9896800000000000,
+        0xbebc200000000000, 0xee6b280000000000, 0x9502f90000000000,
+        0xba43b74000000000, 0xe8d4a51000000000, 0x9184e72a00000000,
+        0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000,
+        0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000,
+        0xad78ebc5ac620000, 0xd8d726b7177a8000, 0x878678326eac9000,
+        0xa968163f0a57b400, 0xd3c21bcecceda100, 0x84595161401484a0,
+        0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940985,
+        0xa18f07d736b90be6, 0xc9f2c9cd04674edf, 0xfc6f7c4045812297,
+        0x9dc5ada82b70b59e, 0xc5371912364ce306, 0xf684df56c3e01bc7,
+        0x9a130b963a6c115d, 0xc097ce7bc90715b4, 0xf0bdc21abb48db21,
+        0x96769950b50d88f5, 0xbc143fa4e250eb32, 0xeb194f8e1ae525fe,
+        0x92efd1b8d0cf37bf, 0xb7abc627050305ae, 0xe596b7b0c643c71a,
+        0x8f7e32ce7bea5c70, 0xb35dbf821ae4f38c, 0xe0352f62a19e306f};
+    return pow10_significands[k - float_info<float>::min_k];
+  }
+
+  struct compute_mul_result {
+    carrier_uint result;
+    bool is_integer;
+  };
+  struct compute_mul_parity_result {
+    bool parity;
+    bool is_integer;
+  };
+
+  static compute_mul_result compute_mul(
+      carrier_uint u, const cache_entry_type& cache) noexcept {
+    auto r = umul96_upper64(u, cache);
+    return {static_cast<carrier_uint>(r >> 32),
+            static_cast<carrier_uint>(r) == 0};
+  }
+
+  static uint32_t compute_delta(const cache_entry_type& cache,
+                                int beta) noexcept {
+    return static_cast<uint32_t>(cache >> (64 - 1 - beta));
+  }
+
+  static compute_mul_parity_result compute_mul_parity(
+      carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept {
+    FMT_ASSERT(beta >= 1, "");
+    FMT_ASSERT(beta < 64, "");
+
+    auto r = umul96_lower64(two_f, cache);
+    return {((r >> (64 - beta)) & 1) != 0,
+            static_cast<uint32_t>(r >> (32 - beta)) == 0};
+  }
+
+  static carrier_uint compute_left_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept {
+    return static_cast<carrier_uint>(
+        (cache - (cache >> (num_significand_bits<float>() + 2))) >>
+        (64 - num_significand_bits<float>() - 1 - beta));
+  }
+
+  static carrier_uint compute_right_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept {
+    return static_cast<carrier_uint>(
+        (cache + (cache >> (num_significand_bits<float>() + 1))) >>
+        (64 - num_significand_bits<float>() - 1 - beta));
+  }
+
+  static carrier_uint compute_round_up_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept {
+    return (static_cast<carrier_uint>(
+                cache >> (64 - num_significand_bits<float>() - 2 - beta)) +
+            1) /
+           2;
+  }
+};
+
+template <> struct cache_accessor<double> {
+  using carrier_uint = float_info<double>::carrier_uint;
+  using cache_entry_type = uint128_fallback;
+
+  static uint128_fallback get_cached_power(int k) noexcept {
+    FMT_ASSERT(k >= float_info<double>::min_k && k <= float_info<double>::max_k,
+               "k is out of range");
+
+    static constexpr const uint128_fallback pow10_significands[] = {
+#if FMT_USE_FULL_CACHE_DRAGONBOX
+      {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
+      {0x9faacf3df73609b1, 0x77b191618c54e9ad},
+      {0xc795830d75038c1d, 0xd59df5b9ef6a2418},
+      {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e},
+      {0x9becce62836ac577, 0x4ee367f9430aec33},
+      {0xc2e801fb244576d5, 0x229c41f793cda740},
+      {0xf3a20279ed56d48a, 0x6b43527578c11110},
+      {0x9845418c345644d6, 0x830a13896b78aaaa},
+      {0xbe5691ef416bd60c, 0x23cc986bc656d554},
+      {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9},
+      {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa},
+      {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54},
+      {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69},
+      {0x91376c36d99995be, 0x23100809b9c21fa2},
+      {0xb58547448ffffb2d, 0xabd40a0c2832a78b},
+      {0xe2e69915b3fff9f9, 0x16c90c8f323f516d},
+      {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4},
+      {0xb1442798f49ffb4a, 0x99cd11cfdf41779d},
+      {0xdd95317f31c7fa1d, 0x40405643d711d584},
+      {0x8a7d3eef7f1cfc52, 0x482835ea666b2573},
+      {0xad1c8eab5ee43b66, 0xda3243650005eed0},
+      {0xd863b256369d4a40, 0x90bed43e40076a83},
+      {0x873e4f75e2224e68, 0x5a7744a6e804a292},
+      {0xa90de3535aaae202, 0x711515d0a205cb37},
+      {0xd3515c2831559a83, 0x0d5a5b44ca873e04},
+      {0x8412d9991ed58091, 0xe858790afe9486c3},
+      {0xa5178fff668ae0b6, 0x626e974dbe39a873},
+      {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
+      {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a},
+      {0xa139029f6a239f72, 0x1c1fffc1ebc44e81},
+      {0xc987434744ac874e, 0xa327ffb266b56221},
+      {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9},
+      {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa},
+      {0xc4ce17b399107c22, 0xcb550fb4384d21d4},
+      {0xf6019da07f549b2b, 0x7e2a53a146606a49},
+      {0x99c102844f94e0fb, 0x2eda7444cbfc426e},
+      {0xc0314325637a1939, 0xfa911155fefb5309},
+      {0xf03d93eebc589f88, 0x793555ab7eba27cb},
+      {0x96267c7535b763b5, 0x4bc1558b2f3458df},
+      {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17},
+      {0xea9c227723ee8bcb, 0x465e15a979c1cadd},
+      {0x92a1958a7675175f, 0x0bfacd89ec191eca},
+      {0xb749faed14125d36, 0xcef980ec671f667c},
+      {0xe51c79a85916f484, 0x82b7e12780e7401b},
+      {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811},
+      {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16},
+      {0xdfbdcece67006ac9, 0x67a791e093e1d49b},
+      {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1},
+      {0xaecc49914078536d, 0x58fae9f773886e19},
+      {0xda7f5bf590966848, 0xaf39a475506a899f},
+      {0x888f99797a5e012d, 0x6d8406c952429604},
+      {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84},
+      {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65},
+      {0x855c3be0a17fcd26, 0x5cf2eea09a550680},
+      {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f},
+      {0xd0601d8efc57b08b, 0xf13b94daf124da27},
+      {0x823c12795db6ce57, 0x76c53d08d6b70859},
+      {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f},
+      {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a},
+      {0xfe5d54150b090b02, 0xd3f93b35435d7c4d},
+      {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0},
+      {0xc6b8e9b0709f109a, 0x359ab6419ca1091c},
+      {0xf867241c8cc6d4c0, 0xc30163d203c94b63},
+      {0x9b407691d7fc44f8, 0x79e0de63425dcf1e},
+      {0xc21094364dfb5636, 0x985915fc12f542e5},
+      {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e},
+      {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43},
+      {0xbd8430bd08277231, 0x50c6ff782a838354},
+      {0xece53cec4a314ebd, 0xa4f8bf5635246429},
+      {0x940f4613ae5ed136, 0x871b7795e136be9a},
+      {0xb913179899f68584, 0x28e2557b59846e40},
+      {0xe757dd7ec07426e5, 0x331aeada2fe589d0},
+      {0x9096ea6f3848984f, 0x3ff0d2c85def7622},
+      {0xb4bca50b065abe63, 0x0fed077a756b53aa},
+      {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895},
+      {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d},
+      {0xb080392cc4349dec, 0xbd8d794d96aacfb4},
+      {0xdca04777f541c567, 0xecf0d7a0fc5583a1},
+      {0x89e42caaf9491b60, 0xf41686c49db57245},
+      {0xac5d37d5b79b6239, 0x311c2875c522ced6},
+      {0xd77485cb25823ac7, 0x7d633293366b828c},
+      {0x86a8d39ef77164bc, 0xae5dff9c02033198},
+      {0xa8530886b54dbdeb, 0xd9f57f830283fdfd},
+      {0xd267caa862a12d66, 0xd072df63c324fd7c},
+      {0x8380dea93da4bc60, 0x4247cb9e59f71e6e},
+      {0xa46116538d0deb78, 0x52d9be85f074e609},
+      {0xcd795be870516656, 0x67902e276c921f8c},
+      {0x806bd9714632dff6, 0x00ba1cd8a3db53b7},
+      {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5},
+      {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce},
+      {0xfad2a4b13d1b5d6c, 0x796b805720085f82},
+      {0x9cc3a6eec6311a63, 0xcbe3303674053bb1},
+      {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d},
+      {0xf4f1b4d515acb93b, 0xee92fb5515482d45},
+      {0x991711052d8bf3c5, 0x751bdd152d4d1c4b},
+      {0xbf5cd54678eef0b6, 0xd262d45a78a0635e},
+      {0xef340a98172aace4, 0x86fb897116c87c35},
+      {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1},
+      {0xbae0a846d2195712, 0x8974836059cca10a},
+      {0xe998d258869facd7, 0x2bd1a438703fc94c},
+      {0x91ff83775423cc06, 0x7b6306a34627ddd0},
+      {0xb67f6455292cbf08, 0x1a3bc84c17b1d543},
+      {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94},
+      {0x8e938662882af53e, 0x547eb47b7282ee9d},
+      {0xb23867fb2a35b28d, 0xe99e619a4f23aa44},
+      {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5},
+      {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05},
+      {0xae0b158b4738705e, 0x9624ab50b148d446},
+      {0xd98ddaee19068c76, 0x3badd624dd9b0958},
+      {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7},
+      {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d},
+      {0xd47487cc8470652b, 0x7647c32000696720},
+      {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074},
+      {0xa5fb0a17c777cf09, 0xf468107100525891},
+      {0xcf79cc9db955c2cc, 0x7182148d4066eeb5},
+      {0x81ac1fe293d599bf, 0xc6f14cd848405531},
+      {0xa21727db38cb002f, 0xb8ada00e5a506a7d},
+      {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d},
+      {0xfd442e4688bd304a, 0x908f4a166d1da664},
+      {0x9e4a9cec15763e2e, 0x9a598e4e043287ff},
+      {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe},
+      {0xf7549530e188c128, 0xd12bee59e68ef47d},
+      {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf},
+      {0xc13a148e3032d6e7, 0xe36a52363c1faf02},
+      {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2},
+      {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba},
+      {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8},
+      {0xebdf661791d60f56, 0x111b495b3464ad22},
+      {0x936b9fcebb25c995, 0xcab10dd900beec35},
+      {0xb84687c269ef3bfb, 0x3d5d514f40eea743},
+      {0xe65829b3046b0afa, 0x0cb4a5a3112a5113},
+      {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac},
+      {0xb3f4e093db73a093, 0x59ed216765690f57},
+      {0xe0f218b8d25088b8, 0x306869c13ec3532d},
+      {0x8c974f7383725573, 0x1e414218c73a13fc},
+      {0xafbd2350644eeacf, 0xe5d1929ef90898fb},
+      {0xdbac6c247d62a583, 0xdf45f746b74abf3a},
+      {0x894bc396ce5da772, 0x6b8bba8c328eb784},
+      {0xab9eb47c81f5114f, 0x066ea92f3f326565},
+      {0xd686619ba27255a2, 0xc80a537b0efefebe},
+      {0x8613fd0145877585, 0xbd06742ce95f5f37},
+      {0xa798fc4196e952e7, 0x2c48113823b73705},
+      {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6},
+      {0x82ef85133de648c4, 0x9a984d73dbe722fc},
+      {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb},
+      {0xcc963fee10b7d1b3, 0x318df905079926a9},
+      {0xffbbcfe994e5c61f, 0xfdf17746497f7053},
+      {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634},
+      {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1},
+      {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1},
+      {0x9c1661a651213e2d, 0x06bea10ca65c084f},
+      {0xc31bfa0fe5698db8, 0x486e494fcff30a63},
+      {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb},
+      {0x986ddb5c6b3a76b7, 0xf89629465a75e01d},
+      {0xbe89523386091465, 0xf6bbb397f1135824},
+      {0xee2ba6c0678b597f, 0x746aa07ded582e2d},
+      {0x94db483840b717ef, 0xa8c2a44eb4571cdd},
+      {0xba121a4650e4ddeb, 0x92f34d62616ce414},
+      {0xe896a0d7e51e1566, 0x77b020baf9c81d18},
+      {0x915e2486ef32cd60, 0x0ace1474dc1d122f},
+      {0xb5b5ada8aaff80b8, 0x0d819992132456bb},
+      {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a},
+      {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2},
+      {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3},
+      {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf},
+      {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c},
+      {0xad4ab7112eb3929d, 0x86c16c98d2c953c7},
+      {0xd89d64d57a607744, 0xe871c7bf077ba8b8},
+      {0x87625f056c7c4a8b, 0x11471cd764ad4973},
+      {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0},
+      {0xd389b47879823479, 0x4aff1d108d4ec2c4},
+      {0x843610cb4bf160cb, 0xcedf722a585139bb},
+      {0xa54394fe1eedb8fe, 0xc2974eb4ee658829},
+      {0xce947a3da6a9273e, 0x733d226229feea33},
+      {0x811ccc668829b887, 0x0806357d5a3f5260},
+      {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8},
+      {0xc9bcff6034c13052, 0xfc89b393dd02f0b6},
+      {0xfc2c3f3841f17c67, 0xbbac2078d443ace3},
+      {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e},
+      {0xc5029163f384a931, 0x0a9e795e65d4df12},
+      {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6},
+      {0x99ea0196163fa42e, 0x504bced1bf8e4e46},
+      {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7},
+      {0xf07da27a82c37088, 0x5d767327bb4e5a4d},
+      {0x964e858c91ba2655, 0x3a6a07f8d510f870},
+      {0xbbe226efb628afea, 0x890489f70a55368c},
+      {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f},
+      {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e},
+      {0xb77ada0617e3bbcb, 0x09ce6ebb40173745},
+      {0xe55990879ddcaabd, 0xcc420a6a101d0516},
+      {0x8f57fa54c2a9eab6, 0x9fa946824a12232e},
+      {0xb32df8e9f3546564, 0x47939822dc96abfa},
+      {0xdff9772470297ebd, 0x59787e2b93bc56f8},
+      {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b},
+      {0xaefae51477a06b03, 0xede622920b6b23f2},
+      {0xdab99e59958885c4, 0xe95fab368e45ecee},
+      {0x88b402f7fd75539b, 0x11dbcb0218ebb415},
+      {0xaae103b5fcd2a881, 0xd652bdc29f26a11a},
+      {0xd59944a37c0752a2, 0x4be76d3346f04960},
+      {0x857fcae62d8493a5, 0x6f70a4400c562ddc},
+      {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953},
+      {0xd097ad07a71f26b2, 0x7e2000a41346a7a8},
+      {0x825ecc24c873782f, 0x8ed400668c0c28c9},
+      {0xa2f67f2dfa90563b, 0x728900802f0f32fb},
+      {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba},
+      {0xfea126b7d78186bc, 0xe2f610c84987bfa9},
+      {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca},
+      {0xc6ede63fa05d3143, 0x91503d1c79720dbc},
+      {0xf8a95fcf88747d94, 0x75a44c6397ce912b},
+      {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb},
+      {0xc24452da229b021b, 0xfbe85badce996169},
+      {0xf2d56790ab41c2a2, 0xfae27299423fb9c4},
+      {0x97c560ba6b0919a5, 0xdccd879fc967d41b},
+      {0xbdb6b8e905cb600f, 0x5400e987bbc1c921},
+      {0xed246723473e3813, 0x290123e9aab23b69},
+      {0x9436c0760c86e30b, 0xf9a0b6720aaf6522},
+      {0xb94470938fa89bce, 0xf808e40e8d5b3e6a},
+      {0xe7958cb87392c2c2, 0xb60b1d1230b20e05},
+      {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3},
+      {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4},
+      {0xe2280b6c20dd5232, 0x25c6da63c38de1b1},
+      {0x8d590723948a535f, 0x579c487e5a38ad0f},
+      {0xb0af48ec79ace837, 0x2d835a9df0c6d852},
+      {0xdcdb1b2798182244, 0xf8e431456cf88e66},
+      {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900},
+      {0xac8b2d36eed2dac5, 0xe272467e3d222f40},
+      {0xd7adf884aa879177, 0x5b0ed81dcc6abb10},
+      {0x86ccbb52ea94baea, 0x98e947129fc2b4ea},
+      {0xa87fea27a539e9a5, 0x3f2398d747b36225},
+      {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae},
+      {0x83a3eeeef9153e89, 0x1953cf68300424ad},
+      {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8},
+      {0xcdb02555653131b6, 0x3792f412cb06794e},
+      {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1},
+      {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5},
+      {0xc8de047564d20a8b, 0xf245825a5a445276},
+      {0xfb158592be068d2e, 0xeed6e2f0f0d56713},
+      {0x9ced737bb6c4183d, 0x55464dd69685606c},
+      {0xc428d05aa4751e4c, 0xaa97e14c3c26b887},
+      {0xf53304714d9265df, 0xd53dd99f4b3066a9},
+      {0x993fe2c6d07b7fab, 0xe546a8038efe402a},
+      {0xbf8fdb78849a5f96, 0xde98520472bdd034},
+      {0xef73d256a5c0f77c, 0x963e66858f6d4441},
+      {0x95a8637627989aad, 0xdde7001379a44aa9},
+      {0xbb127c53b17ec159, 0x5560c018580d5d53},
+      {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7},
+      {0x9226712162ab070d, 0xcab3961304ca70e9},
+      {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23},
+      {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b},
+      {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243},
+      {0xb267ed1940f1c61c, 0x55f038b237591ed4},
+      {0xdf01e85f912e37a3, 0x6b6c46dec52f6689},
+      {0x8b61313bbabce2c6, 0x2323ac4b3b3da016},
+      {0xae397d8aa96c1b77, 0xabec975e0a0d081b},
+      {0xd9c7dced53c72255, 0x96e7bd358c904a22},
+      {0x881cea14545c7575, 0x7e50d64177da2e55},
+      {0xaa242499697392d2, 0xdde50bd1d5d0b9ea},
+      {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865},
+      {0x84ec3c97da624ab4, 0xbd5af13bef0b113f},
+      {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f},
+      {0xcfb11ead453994ba, 0x67de18eda5814af3},
+      {0x81ceb32c4b43fcf4, 0x80eacf948770ced8},
+      {0xa2425ff75e14fc31, 0xa1258379a94d028e},
+      {0xcad2f7f5359a3b3e, 0x096ee45813a04331},
+      {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd},
+      {0x9e74d1b791e07e48, 0x775ea264cf55347e},
+      {0xc612062576589dda, 0x95364afe032a819e},
+      {0xf79687aed3eec551, 0x3a83ddbd83f52205},
+      {0x9abe14cd44753b52, 0xc4926a9672793543},
+      {0xc16d9a0095928a27, 0x75b7053c0f178294},
+      {0xf1c90080baf72cb1, 0x5324c68b12dd6339},
+      {0x971da05074da7bee, 0xd3f6fc16ebca5e04},
+      {0xbce5086492111aea, 0x88f4bb1ca6bcf585},
+      {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6},
+      {0x9392ee8e921d5d07, 0x3aff322e62439fd0},
+      {0xb877aa3236a4b449, 0x09befeb9fad487c3},
+      {0xe69594bec44de15b, 0x4c2ebe687989a9b4},
+      {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11},
+      {0xb424dc35095cd80f, 0x538484c19ef38c95},
+      {0xe12e13424bb40e13, 0x2865a5f206b06fba},
+      {0x8cbccc096f5088cb, 0xf93f87b7442e45d4},
+      {0xafebff0bcb24aafe, 0xf78f69a51539d749},
+      {0xdbe6fecebdedd5be, 0xb573440e5a884d1c},
+      {0x89705f4136b4a597, 0x31680a88f8953031},
+      {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e},
+      {0xd6bf94d5e57a42bc, 0x3d32907604691b4d},
+      {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110},
+      {0xa7c5ac471b478423, 0x0fcf80dc33721d54},
+      {0xd1b71758e219652b, 0xd3c36113404ea4a9},
+      {0x83126e978d4fdf3b, 0x645a1cac083126ea},
+      {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4},
+      {0xcccccccccccccccc, 0xcccccccccccccccd},
+      {0x8000000000000000, 0x0000000000000000},
+      {0xa000000000000000, 0x0000000000000000},
+      {0xc800000000000000, 0x0000000000000000},
+      {0xfa00000000000000, 0x0000000000000000},
+      {0x9c40000000000000, 0x0000000000000000},
+      {0xc350000000000000, 0x0000000000000000},
+      {0xf424000000000000, 0x0000000000000000},
+      {0x9896800000000000, 0x0000000000000000},
+      {0xbebc200000000000, 0x0000000000000000},
+      {0xee6b280000000000, 0x0000000000000000},
+      {0x9502f90000000000, 0x0000000000000000},
+      {0xba43b74000000000, 0x0000000000000000},
+      {0xe8d4a51000000000, 0x0000000000000000},
+      {0x9184e72a00000000, 0x0000000000000000},
+      {0xb5e620f480000000, 0x0000000000000000},
+      {0xe35fa931a0000000, 0x0000000000000000},
+      {0x8e1bc9bf04000000, 0x0000000000000000},
+      {0xb1a2bc2ec5000000, 0x0000000000000000},
+      {0xde0b6b3a76400000, 0x0000000000000000},
+      {0x8ac7230489e80000, 0x0000000000000000},
+      {0xad78ebc5ac620000, 0x0000000000000000},
+      {0xd8d726b7177a8000, 0x0000000000000000},
+      {0x878678326eac9000, 0x0000000000000000},
+      {0xa968163f0a57b400, 0x0000000000000000},
+      {0xd3c21bcecceda100, 0x0000000000000000},
+      {0x84595161401484a0, 0x0000000000000000},
+      {0xa56fa5b99019a5c8, 0x0000000000000000},
+      {0xcecb8f27f4200f3a, 0x0000000000000000},
+      {0x813f3978f8940984, 0x4000000000000000},
+      {0xa18f07d736b90be5, 0x5000000000000000},
+      {0xc9f2c9cd04674ede, 0xa400000000000000},
+      {0xfc6f7c4045812296, 0x4d00000000000000},
+      {0x9dc5ada82b70b59d, 0xf020000000000000},
+      {0xc5371912364ce305, 0x6c28000000000000},
+      {0xf684df56c3e01bc6, 0xc732000000000000},
+      {0x9a130b963a6c115c, 0x3c7f400000000000},
+      {0xc097ce7bc90715b3, 0x4b9f100000000000},
+      {0xf0bdc21abb48db20, 0x1e86d40000000000},
+      {0x96769950b50d88f4, 0x1314448000000000},
+      {0xbc143fa4e250eb31, 0x17d955a000000000},
+      {0xeb194f8e1ae525fd, 0x5dcfab0800000000},
+      {0x92efd1b8d0cf37be, 0x5aa1cae500000000},
+      {0xb7abc627050305ad, 0xf14a3d9e40000000},
+      {0xe596b7b0c643c719, 0x6d9ccd05d0000000},
+      {0x8f7e32ce7bea5c6f, 0xe4820023a2000000},
+      {0xb35dbf821ae4f38b, 0xdda2802c8a800000},
+      {0xe0352f62a19e306e, 0xd50b2037ad200000},
+      {0x8c213d9da502de45, 0x4526f422cc340000},
+      {0xaf298d050e4395d6, 0x9670b12b7f410000},
+      {0xdaf3f04651d47b4c, 0x3c0cdd765f114000},
+      {0x88d8762bf324cd0f, 0xa5880a69fb6ac800},
+      {0xab0e93b6efee0053, 0x8eea0d047a457a00},
+      {0xd5d238a4abe98068, 0x72a4904598d6d880},
+      {0x85a36366eb71f041, 0x47a6da2b7f864750},
+      {0xa70c3c40a64e6c51, 0x999090b65f67d924},
+      {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d},
+      {0x82818f1281ed449f, 0xbff8f10e7a8921a5},
+      {0xa321f2d7226895c7, 0xaff72d52192b6a0e},
+      {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764491},
+      {0xfee50b7025c36a08, 0x02f236d04753d5b5},
+      {0x9f4f2726179a2245, 0x01d762422c946591},
+      {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef6},
+      {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb3},
+      {0x9b934c3b330c8577, 0x63cc55f49f88eb30},
+      {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fc},
+      {0xf316271c7fc3908a, 0x8bef464e3945ef7b},
+      {0x97edd871cfda3a56, 0x97758bf0e3cbb5ad},
+      {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea318},
+      {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bde},
+      {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6b},
+      {0xb975d6b6ee39e436, 0xb3e2fd538e122b45},
+      {0xe7d34c64a9c85d44, 0x60dbbca87196b617},
+      {0x90e40fbeea1d3a4a, 0xbc8955e946fe31ce},
+      {0xb51d13aea4a488dd, 0x6babab6398bdbe42},
+      {0xe264589a4dcdab14, 0xc696963c7eed2dd2},
+      {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca3},
+      {0xb0de65388cc8ada8, 0x3b25a55f43294bcc},
+      {0xdd15fe86affad912, 0x49ef0eb713f39ebf},
+      {0x8a2dbf142dfcc7ab, 0x6e3569326c784338},
+      {0xacb92ed9397bf996, 0x49c2c37f07965405},
+      {0xd7e77a8f87daf7fb, 0xdc33745ec97be907},
+      {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a4},
+      {0xa8acd7c0222311bc, 0xc40832ea0d68ce0d},
+      {0xd2d80db02aabd62b, 0xf50a3fa490c30191},
+      {0x83c7088e1aab65db, 0x792667c6da79e0fb},
+      {0xa4b8cab1a1563f52, 0x577001b891185939},
+      {0xcde6fd5e09abcf26, 0xed4c0226b55e6f87},
+      {0x80b05e5ac60b6178, 0x544f8158315b05b5},
+      {0xa0dc75f1778e39d6, 0x696361ae3db1c722},
+      {0xc913936dd571c84c, 0x03bc3a19cd1e38ea},
+      {0xfb5878494ace3a5f, 0x04ab48a04065c724},
+      {0x9d174b2dcec0e47b, 0x62eb0d64283f9c77},
+      {0xc45d1df942711d9a, 0x3ba5d0bd324f8395},
+      {0xf5746577930d6500, 0xca8f44ec7ee3647a},
+      {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecc},
+      {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67f},
+      {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101f},
+      {0x95d04aee3b80ece5, 0xbba1f1d158724a13},
+      {0xbb445da9ca61281f, 0x2a8a6e45ae8edc98},
+      {0xea1575143cf97226, 0xf52d09d71a3293be},
+      {0x924d692ca61be758, 0x593c2626705f9c57},
+      {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836d},
+      {0xe498f455c38b997a, 0x0b6dfb9c0f956448},
+      {0x8edf98b59a373fec, 0x4724bd4189bd5ead},
+      {0xb2977ee300c50fe7, 0x58edec91ec2cb658},
+      {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ee},
+      {0x8b865b215899f46c, 0xbd79e0d20082ee75},
+      {0xae67f1e9aec07187, 0xecd8590680a3aa12},
+      {0xda01ee641a708de9, 0xe80e6f4820cc9496},
+      {0x884134fe908658b2, 0x3109058d147fdcde},
+      {0xaa51823e34a7eede, 0xbd4b46f0599fd416},
+      {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91b},
+      {0x850fadc09923329e, 0x03e2cf6bc604ddb1},
+      {0xa6539930bf6bff45, 0x84db8346b786151d},
+      {0xcfe87f7cef46ff16, 0xe612641865679a64},
+      {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07f},
+      {0xa26da3999aef7749, 0xe3be5e330f38f09e},
+      {0xcb090c8001ab551c, 0x5cadf5bfd3072cc6},
+      {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f7},
+      {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afb},
+      {0xc646d63501a1511d, 0xb281e1fd541501b9},
+      {0xf7d88bc24209a565, 0x1f225a7ca91a4227},
+      {0x9ae757596946075f, 0x3375788de9b06959},
+      {0xc1a12d2fc3978937, 0x0052d6b1641c83af},
+      {0xf209787bb47d6b84, 0xc0678c5dbd23a49b},
+      {0x9745eb4d50ce6332, 0xf840b7ba963646e1},
+      {0xbd176620a501fbff, 0xb650e5a93bc3d899},
+      {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebf},
+      {0x93ba47c980e98cdf, 0xc66f336c36b10138},
+      {0xb8a8d9bbe123f017, 0xb80b0047445d4185},
+      {0xe6d3102ad96cec1d, 0xa60dc059157491e6},
+      {0x9043ea1ac7e41392, 0x87c89837ad68db30},
+      {0xb454e4a179dd1877, 0x29babe4598c311fc},
+      {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67b},
+      {0x8ce2529e2734bb1d, 0x1899e4a65f58660d},
+      {0xb01ae745b101e9e4, 0x5ec05dcff72e7f90},
+      {0xdc21a1171d42645d, 0x76707543f4fa1f74},
+      {0x899504ae72497eba, 0x6a06494a791c53a9},
+      {0xabfa45da0edbde69, 0x0487db9d17636893},
+      {0xd6f8d7509292d603, 0x45a9d2845d3c42b7},
+      {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b3},
+      {0xa7f26836f282b732, 0x8e6cac7768d7141f},
+      {0xd1ef0244af2364ff, 0x3207d795430cd927},
+      {0x8335616aed761f1f, 0x7f44e6bd49e807b9},
+      {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a7},
+      {0xcd036837130890a1, 0x36dba887c37a8c10},
+      {0x802221226be55a64, 0xc2494954da2c978a},
+      {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6d},
+      {0xc83553c5c8965d3d, 0x6f92829494e5acc8},
+      {0xfa42a8b73abbf48c, 0xcb772339ba1f17fa},
+      {0x9c69a97284b578d7, 0xff2a760414536efc},
+      {0xc38413cf25e2d70d, 0xfef5138519684abb},
+      {0xf46518c2ef5b8cd1, 0x7eb258665fc25d6a},
+      {0x98bf2f79d5993802, 0xef2f773ffbd97a62},
+      {0xbeeefb584aff8603, 0xaafb550ffacfd8fb},
+      {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf39},
+      {0x952ab45cfa97a0b2, 0xdd945a747bf26184},
+      {0xba756174393d88df, 0x94f971119aeef9e5},
+      {0xe912b9d1478ceb17, 0x7a37cd5601aab85e},
+      {0x91abb422ccb812ee, 0xac62e055c10ab33b},
+      {0xb616a12b7fe617aa, 0x577b986b314d600a},
+      {0xe39c49765fdf9d94, 0xed5a7e85fda0b80c},
+      {0x8e41ade9fbebc27d, 0x14588f13be847308},
+      {0xb1d219647ae6b31c, 0x596eb2d8ae258fc9},
+      {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bc},
+      {0x8aec23d680043bee, 0x25de7bb9480d5855},
+      {0xada72ccc20054ae9, 0xaf561aa79a10ae6b},
+      {0xd910f7ff28069da4, 0x1b2ba1518094da05},
+      {0x87aa9aff79042286, 0x90fb44d2f05d0843},
+      {0xa99541bf57452b28, 0x353a1607ac744a54},
+      {0xd3fa922f2d1675f2, 0x42889b8997915ce9},
+      {0x847c9b5d7c2e09b7, 0x69956135febada12},
+      {0xa59bc234db398c25, 0x43fab9837e699096},
+      {0xcf02b2c21207ef2e, 0x94f967e45e03f4bc},
+      {0x8161afb94b44f57d, 0x1d1be0eebac278f6},
+      {0xa1ba1ba79e1632dc, 0x6462d92a69731733},
+      {0xca28a291859bbf93, 0x7d7b8f7503cfdcff},
+      {0xfcb2cb35e702af78, 0x5cda735244c3d43f},
+      {0x9defbf01b061adab, 0x3a0888136afa64a8},
+      {0xc56baec21c7a1916, 0x088aaa1845b8fdd1},
+      {0xf6c69a72a3989f5b, 0x8aad549e57273d46},
+      {0x9a3c2087a63f6399, 0x36ac54e2f678864c},
+      {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7de},
+      {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d6},
+      {0x969eb7c47859e743, 0x9f644ae5a4b1b326},
+      {0xbc4665b596706114, 0x873d5d9f0dde1fef},
+      {0xeb57ff22fc0c7959, 0xa90cb506d155a7eb},
+      {0x9316ff75dd87cbd8, 0x09a7f12442d588f3},
+      {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb30},
+      {0xe5d3ef282a242e81, 0x8f1668c8a86da5fb},
+      {0x8fa475791a569d10, 0xf96e017d694487bd},
+      {0xb38d92d760ec4455, 0x37c981dcc395a9ad},
+      {0xe070f78d3927556a, 0x85bbe253f47b1418},
+      {0x8c469ab843b89562, 0x93956d7478ccec8f},
+      {0xaf58416654a6babb, 0x387ac8d1970027b3},
+      {0xdb2e51bfe9d0696a, 0x06997b05fcc0319f},
+      {0x88fcf317f22241e2, 0x441fece3bdf81f04},
+      {0xab3c2fddeeaad25a, 0xd527e81cad7626c4},
+      {0xd60b3bd56a5586f1, 0x8a71e223d8d3b075},
+      {0x85c7056562757456, 0xf6872d5667844e4a},
+      {0xa738c6bebb12d16c, 0xb428f8ac016561dc},
+      {0xd106f86e69d785c7, 0xe13336d701beba53},
+      {0x82a45b450226b39c, 0xecc0024661173474},
+      {0xa34d721642b06084, 0x27f002d7f95d0191},
+      {0xcc20ce9bd35c78a5, 0x31ec038df7b441f5},
+      {0xff290242c83396ce, 0x7e67047175a15272},
+      {0x9f79a169bd203e41, 0x0f0062c6e984d387},
+      {0xc75809c42c684dd1, 0x52c07b78a3e60869},
+      {0xf92e0c3537826145, 0xa7709a56ccdf8a83},
+      {0x9bbcc7a142b17ccb, 0x88a66076400bb692},
+      {0xc2abf989935ddbfe, 0x6acff893d00ea436},
+      {0xf356f7ebf83552fe, 0x0583f6b8c4124d44},
+      {0x98165af37b2153de, 0xc3727a337a8b704b},
+      {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5d},
+      {0xeda2ee1c7064130c, 0x1162def06f79df74},
+      {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba9},
+      {0xb9a74a0637ce2ee1, 0x6d953e2bd7173693},
+      {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0438},
+      {0x910ab1d4db9914a0, 0x1d9c9892400a22a3},
+      {0xb54d5e4a127f59c8, 0x2503beb6d00cab4c},
+      {0xe2a0b5dc971f303a, 0x2e44ae64840fd61e},
+      {0x8da471a9de737e24, 0x5ceaecfed289e5d3},
+      {0xb10d8e1456105dad, 0x7425a83e872c5f48},
+      {0xdd50f1996b947518, 0xd12f124e28f7771a},
+      {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa70},
+      {0xace73cbfdc0bfb7b, 0x636cc64d1001550c},
+      {0xd8210befd30efa5a, 0x3c47f7e05401aa4f},
+      {0x8714a775e3e95c78, 0x65acfaec34810a72},
+      {0xa8d9d1535ce3b396, 0x7f1839a741a14d0e},
+      {0xd31045a8341ca07c, 0x1ede48111209a051},
+      {0x83ea2b892091e44d, 0x934aed0aab460433},
+      {0xa4e4b66b68b65d60, 0xf81da84d56178540},
+      {0xce1de40642e3f4b9, 0x36251260ab9d668f},
+      {0x80d2ae83e9ce78f3, 0xc1d72b7c6b42601a},
+      {0xa1075a24e4421730, 0xb24cf65b8612f820},
+      {0xc94930ae1d529cfc, 0xdee033f26797b628},
+      {0xfb9b7cd9a4a7443c, 0x169840ef017da3b2},
+      {0x9d412e0806e88aa5, 0x8e1f289560ee864f},
+      {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e3},
+      {0xf5b5d7ec8acb58a2, 0xae10af696774b1dc},
+      {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef2a},
+      {0xbff610b0cc6edd3f, 0x17fd090a58d32af4},
+      {0xeff394dcff8a948e, 0xddfc4b4cef07f5b1},
+      {0x95f83d0a1fb69cd9, 0x4abdaf101564f98f},
+      {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f2},
+      {0xea53df5fd18d5513, 0x84c86189216dc5ee},
+      {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb5},
+      {0xb7118682dbb66a77, 0x3fbc8c33221dc2a2},
+      {0xe4d5e82392a40515, 0x0fabaf3feaa5334b},
+      {0x8f05b1163ba6832d, 0x29cb4d87f2a7400f},
+      {0xb2c71d5bca9023f8, 0x743e20e9ef511013},
+      {0xdf78e4b2bd342cf6, 0x914da9246b255417},
+      {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548f},
+      {0xae9672aba3d0c320, 0xa184ac2473b529b2},
+      {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741f},
+      {0x8865899617fb1871, 0x7e2fa67c7a658893},
+      {0xaa7eebfb9df9de8d, 0xddbb901b98feeab8},
+      {0xd51ea6fa85785631, 0x552a74227f3ea566},
+      {0x8533285c936b35de, 0xd53a88958f872760},
+      {0xa67ff273b8460356, 0x8a892abaf368f138},
+      {0xd01fef10a657842c, 0x2d2b7569b0432d86},
+      {0x8213f56a67f6b29b, 0x9c3b29620e29fc74},
+      {0xa298f2c501f45f42, 0x8349f3ba91b47b90},
+      {0xcb3f2f7642717713, 0x241c70a936219a74},
+      {0xfe0efb53d30dd4d7, 0xed238cd383aa0111},
+      {0x9ec95d1463e8a506, 0xf4363804324a40ab},
+      {0xc67bb4597ce2ce48, 0xb143c6053edcd0d6},
+      {0xf81aa16fdc1b81da, 0xdd94b7868e94050b},
+      {0x9b10a4e5e9913128, 0xca7cf2b4191c8327},
+      {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f1},
+      {0xf24a01a73cf2dccf, 0xbc633b39673c8ced},
+      {0x976e41088617ca01, 0xd5be0503e085d814},
+      {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e19},
+      {0xec9c459d51852ba2, 0xddf8e7d60ed1219f},
+      {0x93e1ab8252f33b45, 0xcabb90e5c942b504},
+      {0xb8da1662e7b00a17, 0x3d6a751f3b936244},
+      {0xe7109bfba19c0c9d, 0x0cc512670a783ad5},
+      {0x906a617d450187e2, 0x27fb2b80668b24c6},
+      {0xb484f9dc9641e9da, 0xb1f9f660802dedf7},
+      {0xe1a63853bbd26451, 0x5e7873f8a0396974},
+      {0x8d07e33455637eb2, 0xdb0b487b6423e1e9},
+      {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda63},
+      {0xdc5c5301c56b75f7, 0x7641a140cc7810fc},
+      {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9e},
+      {0xac2820d9623bf429, 0x546345fa9fbdcd45},
+      {0xd732290fbacaf133, 0xa97c177947ad4096},
+      {0x867f59a9d4bed6c0, 0x49ed8eabcccc485e},
+      {0xa81f301449ee8c70, 0x5c68f256bfff5a75},
+      {0xd226fc195c6a2f8c, 0x73832eec6fff3112},
+      {0x83585d8fd9c25db7, 0xc831fd53c5ff7eac},
+      {0xa42e74f3d032f525, 0xba3e7ca8b77f5e56},
+      {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35ec},
+      {0x80444b5e7aa7cf85, 0x7980d163cf5b81b4},
+      {0xa0555e361951c366, 0xd7e105bcc3326220},
+      {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa8},
+      {0xfa856334878fc150, 0xb14f98f6f0feb952},
+      {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d4},
+      {0xc3b8358109e84f07, 0x0a862f80ec4700c9},
+      {0xf4a642e14c6262c8, 0xcd27bb612758c0fb},
+      {0x98e7e9cccfbd7dbd, 0x8038d51cb897789d},
+      {0xbf21e44003acdd2c, 0xe0470a63e6bd56c4},
+      {0xeeea5d5004981478, 0x1858ccfce06cac75},
+      {0x95527a5202df0ccb, 0x0f37801e0c43ebc9},
+      {0xbaa718e68396cffd, 0xd30560258f54e6bb},
+      {0xe950df20247c83fd, 0x47c6b82ef32a206a},
+      {0x91d28b7416cdd27e, 0x4cdc331d57fa5442},
+      {0xb6472e511c81471d, 0xe0133fe4adf8e953},
+      {0xe3d8f9e563a198e5, 0x58180fddd97723a7},
+      {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7649},
+      {0xb201833b35d63f73, 0x2cd2cc6551e513db},
+      {0xde81e40a034bcf4f, 0xf8077f7ea65e58d2},
+      {0x8b112e86420f6191, 0xfb04afaf27faf783},
+      {0xadd57a27d29339f6, 0x79c5db9af1f9b564},
+      {0xd94ad8b1c7380874, 0x18375281ae7822bd},
+      {0x87cec76f1c830548, 0x8f2293910d0b15b6},
+      {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb23},
+      {0xd433179d9c8cb841, 0x5fa60692a46151ec},
+      {0x849feec281d7f328, 0xdbc7c41ba6bcd334},
+      {0xa5c7ea73224deff3, 0x12b9b522906c0801},
+      {0xcf39e50feae16bef, 0xd768226b34870a01},
+      {0x81842f29f2cce375, 0xe6a1158300d46641},
+      {0xa1e53af46f801c53, 0x60495ae3c1097fd1},
+      {0xca5e89b18b602368, 0x385bb19cb14bdfc5},
+      {0xfcf62c1dee382c42, 0x46729e03dd9ed7b6},
+      {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d2},
+      {0xc5a05277621be293, 0xc7098b7305241886},
+      {0xf70867153aa2db38, 0xb8cbee4fc66d1ea8},
+      {0x9a65406d44a5c903, 0x737f74f1dc043329},
+      {0xc0fe908895cf3b44, 0x505f522e53053ff3},
+      {0xf13e34aabb430a15, 0x647726b9e7c68ff0},
+      {0x96c6e0eab509e64d, 0x5eca783430dc19f6},
+      {0xbc789925624c5fe0, 0xb67d16413d132073},
+      {0xeb96bf6ebadf77d8, 0xe41c5bd18c57e890},
+      {0x933e37a534cbaae7, 0x8e91b962f7b6f15a},
+      {0xb80dc58e81fe95a1, 0x723627bbb5a4adb1},
+      {0xe61136f2227e3b09, 0xcec3b1aaa30dd91d},
+      {0x8fcac257558ee4e6, 0x213a4f0aa5e8a7b2},
+      {0xb3bd72ed2af29e1f, 0xa988e2cd4f62d19e},
+      {0xe0accfa875af45a7, 0x93eb1b80a33b8606},
+      {0x8c6c01c9498d8b88, 0xbc72f130660533c4},
+      {0xaf87023b9bf0ee6a, 0xeb8fad7c7f8680b5},
+      { 0xdb68c2ca82ed2a05,
+        0xa67398db9f6820e2 }
+#else
+      {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
+      {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
+      {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f},
+      {0x86a8d39ef77164bc, 0xae5dff9c02033198},
+      {0xd98ddaee19068c76, 0x3badd624dd9b0958},
+      {0xafbd2350644eeacf, 0xe5d1929ef90898fb},
+      {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2},
+      {0xe55990879ddcaabd, 0xcc420a6a101d0516},
+      {0xb94470938fa89bce, 0xf808e40e8d5b3e6a},
+      {0x95a8637627989aad, 0xdde7001379a44aa9},
+      {0xf1c90080baf72cb1, 0x5324c68b12dd6339},
+      {0xc350000000000000, 0x0000000000000000},
+      {0x9dc5ada82b70b59d, 0xf020000000000000},
+      {0xfee50b7025c36a08, 0x02f236d04753d5b5},
+      {0xcde6fd5e09abcf26, 0xed4c0226b55e6f87},
+      {0xa6539930bf6bff45, 0x84db8346b786151d},
+      {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b3},
+      {0xd910f7ff28069da4, 0x1b2ba1518094da05},
+      {0xaf58416654a6babb, 0x387ac8d1970027b3},
+      {0x8da471a9de737e24, 0x5ceaecfed289e5d3},
+      {0xe4d5e82392a40515, 0x0fabaf3feaa5334b},
+      {0xb8da1662e7b00a17, 0x3d6a751f3b936244},
+      {0x95527a5202df0ccb, 0x0f37801e0c43ebc9},
+      {0xf13e34aabb430a15, 0x647726b9e7c68ff0}
+#endif
+    };
+
+#if FMT_USE_FULL_CACHE_DRAGONBOX
+    return pow10_significands[k - float_info<double>::min_k];
+#else
+    static constexpr const uint64_t powers_of_5_64[] = {
+        0x0000000000000001, 0x0000000000000005, 0x0000000000000019,
+        0x000000000000007d, 0x0000000000000271, 0x0000000000000c35,
+        0x0000000000003d09, 0x000000000001312d, 0x000000000005f5e1,
+        0x00000000001dcd65, 0x00000000009502f9, 0x0000000002e90edd,
+        0x000000000e8d4a51, 0x0000000048c27395, 0x000000016bcc41e9,
+        0x000000071afd498d, 0x0000002386f26fc1, 0x000000b1a2bc2ec5,
+        0x000003782dace9d9, 0x00001158e460913d, 0x000056bc75e2d631,
+        0x0001b1ae4d6e2ef5, 0x000878678326eac9, 0x002a5a058fc295ed,
+        0x00d3c21bcecceda1, 0x0422ca8b0a00a425, 0x14adf4b7320334b9};
+
+    static const int compression_ratio = 27;
+
+    // Compute base index.
+    int cache_index = (k - float_info<double>::min_k) / compression_ratio;
+    int kb = cache_index * compression_ratio + float_info<double>::min_k;
+    int offset = k - kb;
+
+    // Get base cache.
+    uint128_fallback base_cache = pow10_significands[cache_index];
+    if (offset == 0) return base_cache;
+
+    // Compute the required amount of bit-shift.
+    int alpha = floor_log2_pow10(kb + offset) - floor_log2_pow10(kb) - offset;
+    FMT_ASSERT(alpha > 0 && alpha < 64, "shifting error detected");
+
+    // Try to recover the real cache.
+    uint64_t pow5 = powers_of_5_64[offset];
+    uint128_fallback recovered_cache = umul128(base_cache.high(), pow5);
+    uint128_fallback middle_low = umul128(base_cache.low(), pow5);
+
+    recovered_cache += middle_low.high();
+
+    uint64_t high_to_middle = recovered_cache.high() << (64 - alpha);
+    uint64_t middle_to_low = recovered_cache.low() << (64 - alpha);
+
+    recovered_cache =
+        uint128_fallback{(recovered_cache.low() >> alpha) | high_to_middle,
+                         ((middle_low.low() >> alpha) | middle_to_low)};
+    FMT_ASSERT(recovered_cache.low() + 1 != 0, "");
+    return {recovered_cache.high(), recovered_cache.low() + 1};
+#endif
+  }
+
+  struct compute_mul_result {
+    carrier_uint result;
+    bool is_integer;
+  };
+  struct compute_mul_parity_result {
+    bool parity;
+    bool is_integer;
+  };
+
+  static compute_mul_result compute_mul(
+      carrier_uint u, const cache_entry_type& cache) noexcept {
+    auto r = umul192_upper128(u, cache);
+    return {r.high(), r.low() == 0};
+  }
+
+  static uint32_t compute_delta(cache_entry_type const& cache,
+                                int beta) noexcept {
+    return static_cast<uint32_t>(cache.high() >> (64 - 1 - beta));
+  }
+
+  static compute_mul_parity_result compute_mul_parity(
+      carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept {
+    FMT_ASSERT(beta >= 1, "");
+    FMT_ASSERT(beta < 64, "");
+
+    auto r = umul192_lower128(two_f, cache);
+    return {((r.high() >> (64 - beta)) & 1) != 0,
+            ((r.high() << beta) | (r.low() >> (64 - beta))) == 0};
+  }
+
+  static carrier_uint compute_left_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept {
+    return (cache.high() -
+            (cache.high() >> (num_significand_bits<double>() + 2))) >>
+           (64 - num_significand_bits<double>() - 1 - beta);
+  }
+
+  static carrier_uint compute_right_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept {
+    return (cache.high() +
+            (cache.high() >> (num_significand_bits<double>() + 1))) >>
+           (64 - num_significand_bits<double>() - 1 - beta);
+  }
+
+  static carrier_uint compute_round_up_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept {
+    return ((cache.high() >> (64 - num_significand_bits<double>() - 2 - beta)) +
+            1) /
+           2;
+  }
+};
+
+FMT_FUNC uint128_fallback get_cached_power(int k) noexcept {
+  return cache_accessor<double>::get_cached_power(k);
+}
+
+// Various integer checks
+template <typename T>
+bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept {
+  const int case_shorter_interval_left_endpoint_lower_threshold = 2;
+  const int case_shorter_interval_left_endpoint_upper_threshold = 3;
+  return exponent >= case_shorter_interval_left_endpoint_lower_threshold &&
+         exponent <= case_shorter_interval_left_endpoint_upper_threshold;
+}
+
+// Remove trailing zeros from n and return the number of zeros removed (float)
+FMT_INLINE int remove_trailing_zeros(uint32_t& n, int s = 0) noexcept {
+  FMT_ASSERT(n != 0, "");
+  // Modular inverse of 5 (mod 2^32): (mod_inv_5 * 5) mod 2^32 = 1.
+  constexpr uint32_t mod_inv_5 = 0xcccccccd;
+  constexpr uint32_t mod_inv_25 = 0xc28f5c29; // = mod_inv_5 * mod_inv_5
+
+  while (true) {
+    auto q = rotr(n * mod_inv_25, 2);
+    if (q > max_value<uint32_t>() / 100) break;
+    n = q;
+    s += 2;
+  }
+  auto q = rotr(n * mod_inv_5, 1);
+  if (q <= max_value<uint32_t>() / 10) {
+    n = q;
+    s |= 1;
+  }
+  return s;
+}
+
+// Removes trailing zeros and returns the number of zeros removed (double)
+FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept {
+  FMT_ASSERT(n != 0, "");
+
+  // This magic number is ceil(2^90 / 10^8).
+  constexpr uint64_t magic_number = 12379400392853802749ull;
+  auto nm = umul128(n, magic_number);
+
+  // Is n is divisible by 10^8?
+  if ((nm.high() & ((1ull << (90 - 64)) - 1)) == 0 && nm.low() < magic_number) {
+    // If yes, work with the quotient...
+    auto n32 = static_cast<uint32_t>(nm.high() >> (90 - 64));
+    // ... and use the 32 bit variant of the function
+    int s = remove_trailing_zeros(n32, 8);
+    n = n32;
+    return s;
+  }
+
+  // If n is not divisible by 10^8, work with n itself.
+  constexpr uint64_t mod_inv_5 = 0xcccccccccccccccd;
+  constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29; // = mod_inv_5 * mod_inv_5
+
+  int s = 0;
+  while (true) {
+    auto q = rotr(n * mod_inv_25, 2);
+    if (q > max_value<uint64_t>() / 100) break;
+    n = q;
+    s += 2;
+  }
+  auto q = rotr(n * mod_inv_5, 1);
+  if (q <= max_value<uint64_t>() / 10) {
+    n = q;
+    s |= 1;
+  }
+
+  return s;
+}
+
+// The main algorithm for shorter interval case
+template <typename T>
+FMT_INLINE decimal_fp<T> shorter_interval_case(int exponent) noexcept {
+  decimal_fp<T> ret_value;
+  // Compute k and beta
+  const int minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent);
+  const int beta = exponent + floor_log2_pow10(-minus_k);
+
+  // Compute xi and zi
+  using cache_entry_type = typename cache_accessor<T>::cache_entry_type;
+  const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k);
+
+  auto xi = cache_accessor<T>::compute_left_endpoint_for_shorter_interval_case(
+      cache, beta);
+  auto zi = cache_accessor<T>::compute_right_endpoint_for_shorter_interval_case(
+      cache, beta);
+
+  // If the left endpoint is not an integer, increase it
+  if (!is_left_endpoint_integer_shorter_interval<T>(exponent)) ++xi;
+
+  // Try bigger divisor
+  ret_value.significand = zi / 10;
+
+  // If succeed, remove trailing zeros if necessary and return
+  if (ret_value.significand * 10 >= xi) {
+    ret_value.exponent = minus_k + 1;
+    ret_value.exponent += remove_trailing_zeros(ret_value.significand);
+    return ret_value;
+  }
+
+  // Otherwise, compute the round-up of y
+  ret_value.significand =
+      cache_accessor<T>::compute_round_up_for_shorter_interval_case(cache,
+                                                                    beta);
+  ret_value.exponent = minus_k;
+
+  // When tie occurs, choose one of them according to the rule
+  if (exponent >= float_info<T>::shorter_interval_tie_lower_threshold &&
+      exponent <= float_info<T>::shorter_interval_tie_upper_threshold) {
+    ret_value.significand = ret_value.significand % 2 == 0
+                                ? ret_value.significand
+                                : ret_value.significand - 1;
+  } else if (ret_value.significand < xi) {
+    ++ret_value.significand;
+  }
+  return ret_value;
+}
+
+template <typename T> decimal_fp<T> to_decimal(T x) noexcept {
+  // Step 1: integer promotion & Schubfach multiplier calculation.
+
+  using carrier_uint = typename float_info<T>::carrier_uint;
+  using cache_entry_type = typename cache_accessor<T>::cache_entry_type;
+  auto br = bit_cast<carrier_uint>(x);
+
+  // Extract significand bits and exponent bits.
+  const carrier_uint significand_mask =
+      (static_cast<carrier_uint>(1) << num_significand_bits<T>()) - 1;
+  carrier_uint significand = (br & significand_mask);
+  int exponent =
+      static_cast<int>((br & exponent_mask<T>()) >> num_significand_bits<T>());
+
+  if (exponent != 0) {  // Check if normal.
+    exponent -= exponent_bias<T>() + num_significand_bits<T>();
+
+    // Shorter interval case; proceed like Schubfach.
+    // In fact, when exponent == 1 and significand == 0, the interval is
+    // regular. However, it can be shown that the end-results are anyway same.
+    if (significand == 0) return shorter_interval_case<T>(exponent);
+
+    significand |= (static_cast<carrier_uint>(1) << num_significand_bits<T>());
+  } else {
+    // Subnormal case; the interval is always regular.
+    if (significand == 0) return {0, 0};
+    exponent =
+        std::numeric_limits<T>::min_exponent - num_significand_bits<T>() - 1;
+  }
+
+  const bool include_left_endpoint = (significand % 2 == 0);
+  const bool include_right_endpoint = include_left_endpoint;
+
+  // Compute k and beta.
+  const int minus_k = floor_log10_pow2(exponent) - float_info<T>::kappa;
+  const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k);
+  const int beta = exponent + floor_log2_pow10(-minus_k);
+
+  // Compute zi and deltai.
+  // 10^kappa <= deltai < 10^(kappa + 1)
+  const uint32_t deltai = cache_accessor<T>::compute_delta(cache, beta);
+  const carrier_uint two_fc = significand << 1;
+
+  // For the case of binary32, the result of integer check is not correct for
+  // 29711844 * 2^-82
+  // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18
+  // and 29711844 * 2^-81
+  // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17,
+  // and they are the unique counterexamples. However, since 29711844 is even,
+  // this does not cause any problem for the endpoints calculations; it can only
+  // cause a problem when we need to perform integer check for the center.
+  // Fortunately, with these inputs, that branch is never executed, so we are
+  // fine.
+  const typename cache_accessor<T>::compute_mul_result z_mul =
+      cache_accessor<T>::compute_mul((two_fc | 1) << beta, cache);
+
+  // Step 2: Try larger divisor; remove trailing zeros if necessary.
+
+  // Using an upper bound on zi, we might be able to optimize the division
+  // better than the compiler; we are computing zi / big_divisor here.
+  decimal_fp<T> ret_value;
+  ret_value.significand = divide_by_10_to_kappa_plus_1(z_mul.result);
+  uint32_t r = static_cast<uint32_t>(z_mul.result - float_info<T>::big_divisor *
+                                                        ret_value.significand);
+
+  if (r < deltai) {
+    // Exclude the right endpoint if necessary.
+    if (r == 0 && (z_mul.is_integer & !include_right_endpoint)) {
+      --ret_value.significand;
+      r = float_info<T>::big_divisor;
+      goto small_divisor_case_label;
+    }
+  } else if (r > deltai) {
+    goto small_divisor_case_label;
+  } else {
+    // r == deltai; compare fractional parts.
+    const typename cache_accessor<T>::compute_mul_parity_result x_mul =
+        cache_accessor<T>::compute_mul_parity(two_fc - 1, cache, beta);
+
+    if (!(x_mul.parity | (x_mul.is_integer & include_left_endpoint)))
+      goto small_divisor_case_label;
+  }
+  ret_value.exponent = minus_k + float_info<T>::kappa + 1;
+
+  // We may need to remove trailing zeros.
+  ret_value.exponent += remove_trailing_zeros(ret_value.significand);
+  return ret_value;
+
+  // Step 3: Find the significand with the smaller divisor.
+
+small_divisor_case_label:
+  ret_value.significand *= 10;
+  ret_value.exponent = minus_k + float_info<T>::kappa;
+
+  uint32_t dist = r - (deltai / 2) + (float_info<T>::small_divisor / 2);
+  const bool approx_y_parity =
+      ((dist ^ (float_info<T>::small_divisor / 2)) & 1) != 0;
+
+  // Is dist divisible by 10^kappa?
+  const bool divisible_by_small_divisor =
+      check_divisibility_and_divide_by_pow10<float_info<T>::kappa>(dist);
+
+  // Add dist / 10^kappa to the significand.
+  ret_value.significand += dist;
+
+  if (!divisible_by_small_divisor) return ret_value;
+
+  // Check z^(f) >= epsilon^(f).
+  // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1,
+  // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f).
+  // Since there are only 2 possibilities, we only need to care about the
+  // parity. Also, zi and r should have the same parity since the divisor
+  // is an even number.
+  const auto y_mul = cache_accessor<T>::compute_mul_parity(two_fc, cache, beta);
+
+  // If z^(f) >= epsilon^(f), we might have a tie when z^(f) == epsilon^(f),
+  // or equivalently, when y is an integer.
+  if (y_mul.parity != approx_y_parity)
+    --ret_value.significand;
+  else if (y_mul.is_integer & (ret_value.significand % 2 != 0))
+    --ret_value.significand;
+  return ret_value;
+}
+}  // namespace dragonbox
+}  // namespace detail
+
+template <> struct formatter<detail::bigint> {
+  FMT_CONSTEXPR auto parse(format_parse_context& ctx)
+      -> format_parse_context::iterator {
+    return ctx.begin();
+  }
+
+  auto format(const detail::bigint& n, format_context& ctx) const
+      -> format_context::iterator {
+    auto out = ctx.out();
+    bool first = true;
+    for (auto i = n.bigits_.size(); i > 0; --i) {
+      auto value = n.bigits_[i - 1u];
+      if (first) {
+        out = format_to(out, FMT_STRING("{:x}"), value);
+        first = false;
+        continue;
+      }
+      out = format_to(out, FMT_STRING("{:08x}"), value);
+    }
+    if (n.exp_ > 0)
+      out = format_to(out, FMT_STRING("p{}"),
+                      n.exp_ * detail::bigint::bigit_bits);
+    return out;
+  }
+};
+
+FMT_FUNC detail::utf8_to_utf16::utf8_to_utf16(string_view s) {
+  for_each_codepoint(s, [this](uint32_t cp, string_view) {
+    if (cp == invalid_code_point) FMT_THROW(std::runtime_error("invalid utf8"));
+    if (cp <= 0xFFFF) {
+      buffer_.push_back(static_cast<wchar_t>(cp));
+    } else {
+      cp -= 0x10000;
+      buffer_.push_back(static_cast<wchar_t>(0xD800 + (cp >> 10)));
+      buffer_.push_back(static_cast<wchar_t>(0xDC00 + (cp & 0x3FF)));
+    }
+    return true;
+  });
+  buffer_.push_back(0);
+}
+
+FMT_FUNC void format_system_error(detail::buffer<char>& out, int error_code,
+                                  const char* message) noexcept {
+  FMT_TRY {
+    auto ec = std::error_code(error_code, std::generic_category());
+    write(std::back_inserter(out), std::system_error(ec, message).what());
+    return;
+  }
+  FMT_CATCH(...) {}
+  format_error_code(out, error_code, message);
+}
+
+FMT_FUNC void report_system_error(int error_code,
+                                  const char* message) noexcept {
+  report_error(format_system_error, error_code, message);
+}
+
+FMT_FUNC std::string vformat(string_view fmt, format_args args) {
+  // Don't optimize the "{}" case to keep the binary size small and because it
+  // can be better optimized in fmt::format anyway.
+  auto buffer = memory_buffer();
+  detail::vformat_to(buffer, fmt, args);
+  return to_string(buffer);
+}
+
+namespace detail {
+#ifndef _WIN32
+FMT_FUNC bool write_console(std::FILE*, string_view) { return false; }
+#else
+using dword = conditional_t<sizeof(long) == 4, unsigned long, unsigned>;
+extern "C" __declspec(dllimport) int __stdcall WriteConsoleW(  //
+    void*, const void*, dword, dword*, void*);
+
+FMT_FUNC bool write_console(std::FILE* f, string_view text) {
+  auto fd = _fileno(f);
+  if (!_isatty(fd)) return false;
+  auto u16 = utf8_to_utf16(text);
+  auto written = dword();
+  return WriteConsoleW(reinterpret_cast<void*>(_get_osfhandle(fd)), u16.c_str(),
+                       static_cast<uint32_t>(u16.size()), &written, nullptr) != 0;
+}
+
+// Print assuming legacy (non-Unicode) encoding.
+FMT_FUNC void vprint_mojibake(std::FILE* f, string_view fmt, format_args args) {
+  auto buffer = memory_buffer();
+  detail::vformat_to(buffer, fmt,
+                     basic_format_args<buffer_context<char>>(args));
+  fwrite_fully(buffer.data(), 1, buffer.size(), f);
+}
+#endif
+
+FMT_FUNC void print(std::FILE* f, string_view text) {
+  if (!write_console(f, text)) fwrite_fully(text.data(), 1, text.size(), f);
+}
+}  // namespace detail
+
+FMT_FUNC void vprint(std::FILE* f, string_view fmt, format_args args) {
+  auto buffer = memory_buffer();
+  detail::vformat_to(buffer, fmt, args);
+  detail::print(f, {buffer.data(), buffer.size()});
+}
+
+FMT_FUNC void vprint(string_view fmt, format_args args) {
+  vprint(stdout, fmt, args);
+}
+
+namespace detail {
+
+struct singleton {
+  unsigned char upper;
+  unsigned char lower_count;
+};
+
+inline auto is_printable(uint16_t x, const singleton* singletons,
+                         size_t singletons_size,
+                         const unsigned char* singleton_lowers,
+                         const unsigned char* normal, size_t normal_size)
+    -> bool {
+  auto upper = x >> 8;
+  auto lower_start = 0;
+  for (size_t i = 0; i < singletons_size; ++i) {
+    auto s = singletons[i];
+    auto lower_end = lower_start + s.lower_count;
+    if (upper < s.upper) break;
+    if (upper == s.upper) {
+      for (auto j = lower_start; j < lower_end; ++j) {
+        if (singleton_lowers[j] == (x & 0xff)) return false;
+      }
+    }
+    lower_start = lower_end;
+  }
+
+  auto xsigned = static_cast<int>(x);
+  auto current = true;
+  for (size_t i = 0; i < normal_size; ++i) {
+    auto v = static_cast<int>(normal[i]);
+    auto len = (v & 0x80) != 0 ? (v & 0x7f) << 8 | normal[++i] : v;
+    xsigned -= len;
+    if (xsigned < 0) break;
+    current = !current;
+  }
+  return current;
+}
+
+// This code is generated by support/printable.py.
+FMT_FUNC auto is_printable(uint32_t cp) -> bool {
+  static constexpr singleton singletons0[] = {
+      {0x00, 1},  {0x03, 5},  {0x05, 6},  {0x06, 3},  {0x07, 6},  {0x08, 8},
+      {0x09, 17}, {0x0a, 28}, {0x0b, 25}, {0x0c, 20}, {0x0d, 16}, {0x0e, 13},
+      {0x0f, 4},  {0x10, 3},  {0x12, 18}, {0x13, 9},  {0x16, 1},  {0x17, 5},
+      {0x18, 2},  {0x19, 3},  {0x1a, 7},  {0x1c, 2},  {0x1d, 1},  {0x1f, 22},
+      {0x20, 3},  {0x2b, 3},  {0x2c, 2},  {0x2d, 11}, {0x2e, 1},  {0x30, 3},
+      {0x31, 2},  {0x32, 1},  {0xa7, 2},  {0xa9, 2},  {0xaa, 4},  {0xab, 8},
+      {0xfa, 2},  {0xfb, 5},  {0xfd, 4},  {0xfe, 3},  {0xff, 9},
+  };
+  static constexpr unsigned char singletons0_lower[] = {
+      0xad, 0x78, 0x79, 0x8b, 0x8d, 0xa2, 0x30, 0x57, 0x58, 0x8b, 0x8c, 0x90,
+      0x1c, 0x1d, 0xdd, 0x0e, 0x0f, 0x4b, 0x4c, 0xfb, 0xfc, 0x2e, 0x2f, 0x3f,
+      0x5c, 0x5d, 0x5f, 0xb5, 0xe2, 0x84, 0x8d, 0x8e, 0x91, 0x92, 0xa9, 0xb1,
+      0xba, 0xbb, 0xc5, 0xc6, 0xc9, 0xca, 0xde, 0xe4, 0xe5, 0xff, 0x00, 0x04,
+      0x11, 0x12, 0x29, 0x31, 0x34, 0x37, 0x3a, 0x3b, 0x3d, 0x49, 0x4a, 0x5d,
+      0x84, 0x8e, 0x92, 0xa9, 0xb1, 0xb4, 0xba, 0xbb, 0xc6, 0xca, 0xce, 0xcf,
+      0xe4, 0xe5, 0x00, 0x04, 0x0d, 0x0e, 0x11, 0x12, 0x29, 0x31, 0x34, 0x3a,
+      0x3b, 0x45, 0x46, 0x49, 0x4a, 0x5e, 0x64, 0x65, 0x84, 0x91, 0x9b, 0x9d,
+      0xc9, 0xce, 0xcf, 0x0d, 0x11, 0x29, 0x45, 0x49, 0x57, 0x64, 0x65, 0x8d,
+      0x91, 0xa9, 0xb4, 0xba, 0xbb, 0xc5, 0xc9, 0xdf, 0xe4, 0xe5, 0xf0, 0x0d,
+      0x11, 0x45, 0x49, 0x64, 0x65, 0x80, 0x84, 0xb2, 0xbc, 0xbe, 0xbf, 0xd5,
+      0xd7, 0xf0, 0xf1, 0x83, 0x85, 0x8b, 0xa4, 0xa6, 0xbe, 0xbf, 0xc5, 0xc7,
+      0xce, 0xcf, 0xda, 0xdb, 0x48, 0x98, 0xbd, 0xcd, 0xc6, 0xce, 0xcf, 0x49,
+      0x4e, 0x4f, 0x57, 0x59, 0x5e, 0x5f, 0x89, 0x8e, 0x8f, 0xb1, 0xb6, 0xb7,
+      0xbf, 0xc1, 0xc6, 0xc7, 0xd7, 0x11, 0x16, 0x17, 0x5b, 0x5c, 0xf6, 0xf7,
+      0xfe, 0xff, 0x80, 0x0d, 0x6d, 0x71, 0xde, 0xdf, 0x0e, 0x0f, 0x1f, 0x6e,
+      0x6f, 0x1c, 0x1d, 0x5f, 0x7d, 0x7e, 0xae, 0xaf, 0xbb, 0xbc, 0xfa, 0x16,
+      0x17, 0x1e, 0x1f, 0x46, 0x47, 0x4e, 0x4f, 0x58, 0x5a, 0x5c, 0x5e, 0x7e,
+      0x7f, 0xb5, 0xc5, 0xd4, 0xd5, 0xdc, 0xf0, 0xf1, 0xf5, 0x72, 0x73, 0x8f,
+      0x74, 0x75, 0x96, 0x2f, 0x5f, 0x26, 0x2e, 0x2f, 0xa7, 0xaf, 0xb7, 0xbf,
+      0xc7, 0xcf, 0xd7, 0xdf, 0x9a, 0x40, 0x97, 0x98, 0x30, 0x8f, 0x1f, 0xc0,
+      0xc1, 0xce, 0xff, 0x4e, 0x4f, 0x5a, 0x5b, 0x07, 0x08, 0x0f, 0x10, 0x27,
+      0x2f, 0xee, 0xef, 0x6e, 0x6f, 0x37, 0x3d, 0x3f, 0x42, 0x45, 0x90, 0x91,
+      0xfe, 0xff, 0x53, 0x67, 0x75, 0xc8, 0xc9, 0xd0, 0xd1, 0xd8, 0xd9, 0xe7,
+      0xfe, 0xff,
+  };
+  static constexpr singleton singletons1[] = {
+      {0x00, 6},  {0x01, 1}, {0x03, 1},  {0x04, 2}, {0x08, 8},  {0x09, 2},
+      {0x0a, 5},  {0x0b, 2}, {0x0e, 4},  {0x10, 1}, {0x11, 2},  {0x12, 5},
+      {0x13, 17}, {0x14, 1}, {0x15, 2},  {0x17, 2}, {0x19, 13}, {0x1c, 5},
+      {0x1d, 8},  {0x24, 1}, {0x6a, 3},  {0x6b, 2}, {0xbc, 2},  {0xd1, 2},
+      {0xd4, 12}, {0xd5, 9}, {0xd6, 2},  {0xd7, 2}, {0xda, 1},  {0xe0, 5},
+      {0xe1, 2},  {0xe8, 2}, {0xee, 32}, {0xf0, 4}, {0xf8, 2},  {0xf9, 2},
+      {0xfa, 2},  {0xfb, 1},
+  };
+  static constexpr unsigned char singletons1_lower[] = {
+      0x0c, 0x27, 0x3b, 0x3e, 0x4e, 0x4f, 0x8f, 0x9e, 0x9e, 0x9f, 0x06, 0x07,
+      0x09, 0x36, 0x3d, 0x3e, 0x56, 0xf3, 0xd0, 0xd1, 0x04, 0x14, 0x18, 0x36,
+      0x37, 0x56, 0x57, 0x7f, 0xaa, 0xae, 0xaf, 0xbd, 0x35, 0xe0, 0x12, 0x87,
+      0x89, 0x8e, 0x9e, 0x04, 0x0d, 0x0e, 0x11, 0x12, 0x29, 0x31, 0x34, 0x3a,
+      0x45, 0x46, 0x49, 0x4a, 0x4e, 0x4f, 0x64, 0x65, 0x5c, 0xb6, 0xb7, 0x1b,
+      0x1c, 0x07, 0x08, 0x0a, 0x0b, 0x14, 0x17, 0x36, 0x39, 0x3a, 0xa8, 0xa9,
+      0xd8, 0xd9, 0x09, 0x37, 0x90, 0x91, 0xa8, 0x07, 0x0a, 0x3b, 0x3e, 0x66,
+      0x69, 0x8f, 0x92, 0x6f, 0x5f, 0xee, 0xef, 0x5a, 0x62, 0x9a, 0x9b, 0x27,
+      0x28, 0x55, 0x9d, 0xa0, 0xa1, 0xa3, 0xa4, 0xa7, 0xa8, 0xad, 0xba, 0xbc,
+      0xc4, 0x06, 0x0b, 0x0c, 0x15, 0x1d, 0x3a, 0x3f, 0x45, 0x51, 0xa6, 0xa7,
+      0xcc, 0xcd, 0xa0, 0x07, 0x19, 0x1a, 0x22, 0x25, 0x3e, 0x3f, 0xc5, 0xc6,
+      0x04, 0x20, 0x23, 0x25, 0x26, 0x28, 0x33, 0x38, 0x3a, 0x48, 0x4a, 0x4c,
+      0x50, 0x53, 0x55, 0x56, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x63, 0x65, 0x66,
+      0x6b, 0x73, 0x78, 0x7d, 0x7f, 0x8a, 0xa4, 0xaa, 0xaf, 0xb0, 0xc0, 0xd0,
+      0xae, 0xaf, 0x79, 0xcc, 0x6e, 0x6f, 0x93,
+  };
+  static constexpr unsigned char normal0[] = {
+      0x00, 0x20, 0x5f, 0x22, 0x82, 0xdf, 0x04, 0x82, 0x44, 0x08, 0x1b, 0x04,
+      0x06, 0x11, 0x81, 0xac, 0x0e, 0x80, 0xab, 0x35, 0x28, 0x0b, 0x80, 0xe0,
+      0x03, 0x19, 0x08, 0x01, 0x04, 0x2f, 0x04, 0x34, 0x04, 0x07, 0x03, 0x01,
+      0x07, 0x06, 0x07, 0x11, 0x0a, 0x50, 0x0f, 0x12, 0x07, 0x55, 0x07, 0x03,
+      0x04, 0x1c, 0x0a, 0x09, 0x03, 0x08, 0x03, 0x07, 0x03, 0x02, 0x03, 0x03,
+      0x03, 0x0c, 0x04, 0x05, 0x03, 0x0b, 0x06, 0x01, 0x0e, 0x15, 0x05, 0x3a,
+      0x03, 0x11, 0x07, 0x06, 0x05, 0x10, 0x07, 0x57, 0x07, 0x02, 0x07, 0x15,
+      0x0d, 0x50, 0x04, 0x43, 0x03, 0x2d, 0x03, 0x01, 0x04, 0x11, 0x06, 0x0f,
+      0x0c, 0x3a, 0x04, 0x1d, 0x25, 0x5f, 0x20, 0x6d, 0x04, 0x6a, 0x25, 0x80,
+      0xc8, 0x05, 0x82, 0xb0, 0x03, 0x1a, 0x06, 0x82, 0xfd, 0x03, 0x59, 0x07,
+      0x15, 0x0b, 0x17, 0x09, 0x14, 0x0c, 0x14, 0x0c, 0x6a, 0x06, 0x0a, 0x06,
+      0x1a, 0x06, 0x59, 0x07, 0x2b, 0x05, 0x46, 0x0a, 0x2c, 0x04, 0x0c, 0x04,
+      0x01, 0x03, 0x31, 0x0b, 0x2c, 0x04, 0x1a, 0x06, 0x0b, 0x03, 0x80, 0xac,
+      0x06, 0x0a, 0x06, 0x21, 0x3f, 0x4c, 0x04, 0x2d, 0x03, 0x74, 0x08, 0x3c,
+      0x03, 0x0f, 0x03, 0x3c, 0x07, 0x38, 0x08, 0x2b, 0x05, 0x82, 0xff, 0x11,
+      0x18, 0x08, 0x2f, 0x11, 0x2d, 0x03, 0x20, 0x10, 0x21, 0x0f, 0x80, 0x8c,
+      0x04, 0x82, 0x97, 0x19, 0x0b, 0x15, 0x88, 0x94, 0x05, 0x2f, 0x05, 0x3b,
+      0x07, 0x02, 0x0e, 0x18, 0x09, 0x80, 0xb3, 0x2d, 0x74, 0x0c, 0x80, 0xd6,
+      0x1a, 0x0c, 0x05, 0x80, 0xff, 0x05, 0x80, 0xdf, 0x0c, 0xee, 0x0d, 0x03,
+      0x84, 0x8d, 0x03, 0x37, 0x09, 0x81, 0x5c, 0x14, 0x80, 0xb8, 0x08, 0x80,
+      0xcb, 0x2a, 0x38, 0x03, 0x0a, 0x06, 0x38, 0x08, 0x46, 0x08, 0x0c, 0x06,
+      0x74, 0x0b, 0x1e, 0x03, 0x5a, 0x04, 0x59, 0x09, 0x80, 0x83, 0x18, 0x1c,
+      0x0a, 0x16, 0x09, 0x4c, 0x04, 0x80, 0x8a, 0x06, 0xab, 0xa4, 0x0c, 0x17,
+      0x04, 0x31, 0xa1, 0x04, 0x81, 0xda, 0x26, 0x07, 0x0c, 0x05, 0x05, 0x80,
+      0xa5, 0x11, 0x81, 0x6d, 0x10, 0x78, 0x28, 0x2a, 0x06, 0x4c, 0x04, 0x80,
+      0x8d, 0x04, 0x80, 0xbe, 0x03, 0x1b, 0x03, 0x0f, 0x0d,
+  };
+  static constexpr unsigned char normal1[] = {
+      0x5e, 0x22, 0x7b, 0x05, 0x03, 0x04, 0x2d, 0x03, 0x66, 0x03, 0x01, 0x2f,
+      0x2e, 0x80, 0x82, 0x1d, 0x03, 0x31, 0x0f, 0x1c, 0x04, 0x24, 0x09, 0x1e,
+      0x05, 0x2b, 0x05, 0x44, 0x04, 0x0e, 0x2a, 0x80, 0xaa, 0x06, 0x24, 0x04,
+      0x24, 0x04, 0x28, 0x08, 0x34, 0x0b, 0x01, 0x80, 0x90, 0x81, 0x37, 0x09,
+      0x16, 0x0a, 0x08, 0x80, 0x98, 0x39, 0x03, 0x63, 0x08, 0x09, 0x30, 0x16,
+      0x05, 0x21, 0x03, 0x1b, 0x05, 0x01, 0x40, 0x38, 0x04, 0x4b, 0x05, 0x2f,
+      0x04, 0x0a, 0x07, 0x09, 0x07, 0x40, 0x20, 0x27, 0x04, 0x0c, 0x09, 0x36,
+      0x03, 0x3a, 0x05, 0x1a, 0x07, 0x04, 0x0c, 0x07, 0x50, 0x49, 0x37, 0x33,
+      0x0d, 0x33, 0x07, 0x2e, 0x08, 0x0a, 0x81, 0x26, 0x52, 0x4e, 0x28, 0x08,
+      0x2a, 0x56, 0x1c, 0x14, 0x17, 0x09, 0x4e, 0x04, 0x1e, 0x0f, 0x43, 0x0e,
+      0x19, 0x07, 0x0a, 0x06, 0x48, 0x08, 0x27, 0x09, 0x75, 0x0b, 0x3f, 0x41,
+      0x2a, 0x06, 0x3b, 0x05, 0x0a, 0x06, 0x51, 0x06, 0x01, 0x05, 0x10, 0x03,
+      0x05, 0x80, 0x8b, 0x62, 0x1e, 0x48, 0x08, 0x0a, 0x80, 0xa6, 0x5e, 0x22,
+      0x45, 0x0b, 0x0a, 0x06, 0x0d, 0x13, 0x39, 0x07, 0x0a, 0x36, 0x2c, 0x04,
+      0x10, 0x80, 0xc0, 0x3c, 0x64, 0x53, 0x0c, 0x48, 0x09, 0x0a, 0x46, 0x45,
+      0x1b, 0x48, 0x08, 0x53, 0x1d, 0x39, 0x81, 0x07, 0x46, 0x0a, 0x1d, 0x03,
+      0x47, 0x49, 0x37, 0x03, 0x0e, 0x08, 0x0a, 0x06, 0x39, 0x07, 0x0a, 0x81,
+      0x36, 0x19, 0x80, 0xb7, 0x01, 0x0f, 0x32, 0x0d, 0x83, 0x9b, 0x66, 0x75,
+      0x0b, 0x80, 0xc4, 0x8a, 0xbc, 0x84, 0x2f, 0x8f, 0xd1, 0x82, 0x47, 0xa1,
+      0xb9, 0x82, 0x39, 0x07, 0x2a, 0x04, 0x02, 0x60, 0x26, 0x0a, 0x46, 0x0a,
+      0x28, 0x05, 0x13, 0x82, 0xb0, 0x5b, 0x65, 0x4b, 0x04, 0x39, 0x07, 0x11,
+      0x40, 0x05, 0x0b, 0x02, 0x0e, 0x97, 0xf8, 0x08, 0x84, 0xd6, 0x2a, 0x09,
+      0xa2, 0xf7, 0x81, 0x1f, 0x31, 0x03, 0x11, 0x04, 0x08, 0x81, 0x8c, 0x89,
+      0x04, 0x6b, 0x05, 0x0d, 0x03, 0x09, 0x07, 0x10, 0x93, 0x60, 0x80, 0xf6,
+      0x0a, 0x73, 0x08, 0x6e, 0x17, 0x46, 0x80, 0x9a, 0x14, 0x0c, 0x57, 0x09,
+      0x19, 0x80, 0x87, 0x81, 0x47, 0x03, 0x85, 0x42, 0x0f, 0x15, 0x85, 0x50,
+      0x2b, 0x80, 0xd5, 0x2d, 0x03, 0x1a, 0x04, 0x02, 0x81, 0x70, 0x3a, 0x05,
+      0x01, 0x85, 0x00, 0x80, 0xd7, 0x29, 0x4c, 0x04, 0x0a, 0x04, 0x02, 0x83,
+      0x11, 0x44, 0x4c, 0x3d, 0x80, 0xc2, 0x3c, 0x06, 0x01, 0x04, 0x55, 0x05,
+      0x1b, 0x34, 0x02, 0x81, 0x0e, 0x2c, 0x04, 0x64, 0x0c, 0x56, 0x0a, 0x80,
+      0xae, 0x38, 0x1d, 0x0d, 0x2c, 0x04, 0x09, 0x07, 0x02, 0x0e, 0x06, 0x80,
+      0x9a, 0x83, 0xd8, 0x08, 0x0d, 0x03, 0x0d, 0x03, 0x74, 0x0c, 0x59, 0x07,
+      0x0c, 0x14, 0x0c, 0x04, 0x38, 0x08, 0x0a, 0x06, 0x28, 0x08, 0x22, 0x4e,
+      0x81, 0x54, 0x0c, 0x15, 0x03, 0x03, 0x05, 0x07, 0x09, 0x19, 0x07, 0x07,
+      0x09, 0x03, 0x0d, 0x07, 0x29, 0x80, 0xcb, 0x25, 0x0a, 0x84, 0x06,
+  };
+  auto lower = static_cast<uint16_t>(cp);
+  if (cp < 0x10000) {
+    return is_printable(lower, singletons0,
+                        sizeof(singletons0) / sizeof(*singletons0),
+                        singletons0_lower, normal0, sizeof(normal0));
+  }
+  if (cp < 0x20000) {
+    return is_printable(lower, singletons1,
+                        sizeof(singletons1) / sizeof(*singletons1),
+                        singletons1_lower, normal1, sizeof(normal1));
+  }
+  if (0x2a6de <= cp && cp < 0x2a700) return false;
+  if (0x2b735 <= cp && cp < 0x2b740) return false;
+  if (0x2b81e <= cp && cp < 0x2b820) return false;
+  if (0x2cea2 <= cp && cp < 0x2ceb0) return false;
+  if (0x2ebe1 <= cp && cp < 0x2f800) return false;
+  if (0x2fa1e <= cp && cp < 0x30000) return false;
+  if (0x3134b <= cp && cp < 0xe0100) return false;
+  if (0xe01f0 <= cp && cp < 0x110000) return false;
+  return cp < 0x110000;
+}
+
+}  // namespace detail
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_FORMAT_INL_H_
diff --git a/Genie/Genie/src/qualla/include/fmt/format.h b/Genie/Genie/src/qualla/include/fmt/format.h
new file mode 100644
index 0000000000000000000000000000000000000000..87a34b972ce6af4e2209e4d6cf78e8401e8f0037
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/fmt/format.h
@@ -0,0 +1,4510 @@
+/*
+  Formatting library for C++
+
+  Copyright (c) 2012 - present, Victor Zverovich
+
+  Permission is hereby granted, free of charge, to any person obtaining
+  a copy of this software and associated documentation files (the
+  "Software"), to deal in the Software without restriction, including
+  without limitation the rights to use, copy, modify, merge, publish,
+  distribute, sublicense, and/or sell copies of the Software, and to
+  permit persons to whom the Software is furnished to do so, subject to
+  the following conditions:
+
+  The above copyright notice and this permission notice shall be
+  included in all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+  --- Optional exception to the license ---
+
+  As an exception, if, as a result of your compiling your source code, portions
+  of this Software are embedded into a machine-executable object form of such
+  source code, you may redistribute such embedded portions in such object form
+  without including the above copyright and permission notices.
+ */
+
+#ifndef FMT_FORMAT_H_
+#define FMT_FORMAT_H_
+
+#include <cmath>             // std::signbit
+#include <cstdint>           // uint32_t
+#include <cstring>           // std::memcpy
+#include <initializer_list>  // std::initializer_list
+#include <limits>            // std::numeric_limits
+#include <memory>            // std::uninitialized_copy
+#include <stdexcept>         // std::runtime_error
+#include <system_error>      // std::system_error
+
+#ifdef __cpp_lib_bit_cast
+#  include <bit>  // std::bitcast
+#endif
+
+#include "core.h"
+
+#if defined __cpp_inline_variables && __cpp_inline_variables >= 201606L
+#  define FMT_INLINE_VARIABLE inline
+#else
+#  define FMT_INLINE_VARIABLE
+#endif
+
+#if FMT_HAS_CPP17_ATTRIBUTE(fallthrough)
+#  define FMT_FALLTHROUGH [[fallthrough]]
+#elif defined(__clang__)
+#  define FMT_FALLTHROUGH [[clang::fallthrough]]
+#elif FMT_GCC_VERSION >= 700 && \
+    (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520)
+#  define FMT_FALLTHROUGH [[gnu::fallthrough]]
+#else
+#  define FMT_FALLTHROUGH
+#endif
+
+#ifndef FMT_DEPRECATED
+#  if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900
+#    define FMT_DEPRECATED [[deprecated]]
+#  else
+#    if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__)
+#      define FMT_DEPRECATED __attribute__((deprecated))
+#    elif FMT_MSC_VERSION
+#      define FMT_DEPRECATED __declspec(deprecated)
+#    else
+#      define FMT_DEPRECATED /* deprecated */
+#    endif
+#  endif
+#endif
+
+#ifndef FMT_NO_UNIQUE_ADDRESS
+#  if FMT_CPLUSPLUS >= 202002L
+#    if FMT_HAS_CPP_ATTRIBUTE(no_unique_address)
+#      define FMT_NO_UNIQUE_ADDRESS [[no_unique_address]]
+// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485)
+#    elif (FMT_MSC_VERSION >= 1929) && !FMT_CLANG_VERSION
+#      define FMT_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]]
+#    endif
+#  endif
+#endif
+#ifndef FMT_NO_UNIQUE_ADDRESS
+#  define FMT_NO_UNIQUE_ADDRESS
+#endif
+
+#if FMT_GCC_VERSION || defined(__clang__)
+#  define FMT_VISIBILITY(value) __attribute__((visibility(value)))
+#else
+#  define FMT_VISIBILITY(value)
+#endif
+
+#ifdef __has_builtin
+#  define FMT_HAS_BUILTIN(x) __has_builtin(x)
+#else
+#  define FMT_HAS_BUILTIN(x) 0
+#endif
+
+#if FMT_GCC_VERSION || FMT_CLANG_VERSION
+#  define FMT_NOINLINE __attribute__((noinline))
+#else
+#  define FMT_NOINLINE
+#endif
+
+#ifndef FMT_THROW
+#  if FMT_EXCEPTIONS
+#    if FMT_MSC_VERSION || defined(__NVCC__)
+FMT_BEGIN_NAMESPACE
+namespace detail {
+template <typename Exception> inline void do_throw(const Exception& x) {
+  // Silence unreachable code warnings in MSVC and NVCC because these
+  // are nearly impossible to fix in a generic code.
+  volatile bool b = true;
+  if (b) throw x;
+}
+}  // namespace detail
+FMT_END_NAMESPACE
+#      define FMT_THROW(x) detail::do_throw(x)
+#    else
+#      define FMT_THROW(x) throw x
+#    endif
+#  else
+#    define FMT_THROW(x) \
+      ::fmt::detail::assert_fail(__FILE__, __LINE__, (x).what())
+#  endif
+#endif
+
+#if FMT_EXCEPTIONS
+#  define FMT_TRY try
+#  define FMT_CATCH(x) catch (x)
+#else
+#  define FMT_TRY if (true)
+#  define FMT_CATCH(x) if (false)
+#endif
+
+#ifndef FMT_MAYBE_UNUSED
+#  if FMT_HAS_CPP17_ATTRIBUTE(maybe_unused)
+#    define FMT_MAYBE_UNUSED [[maybe_unused]]
+#  else
+#    define FMT_MAYBE_UNUSED
+#  endif
+#endif
+
+#ifndef FMT_USE_USER_DEFINED_LITERALS
+// EDG based compilers (Intel, NVIDIA, Elbrus, etc), GCC and MSVC support UDLs.
+#  if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \
+       FMT_MSC_VERSION >= 1900) &&                                     \
+      (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= /* UDL feature */ 480)
+#    define FMT_USE_USER_DEFINED_LITERALS 1
+#  else
+#    define FMT_USE_USER_DEFINED_LITERALS 0
+#  endif
+#endif
+
+// Defining FMT_REDUCE_INT_INSTANTIATIONS to 1, will reduce the number of
+// integer formatter template instantiations to just one by only using the
+// largest integer type. This results in a reduction in binary size but will
+// cause a decrease in integer formatting performance.
+#if !defined(FMT_REDUCE_INT_INSTANTIATIONS)
+#  define FMT_REDUCE_INT_INSTANTIATIONS 0
+#endif
+
+// __builtin_clz is broken in clang with Microsoft CodeGen:
+// https://github.com/fmtlib/fmt/issues/519.
+#if !FMT_MSC_VERSION
+#  if FMT_HAS_BUILTIN(__builtin_clz) || FMT_GCC_VERSION || FMT_ICC_VERSION
+#    define FMT_BUILTIN_CLZ(n) __builtin_clz(n)
+#  endif
+#  if FMT_HAS_BUILTIN(__builtin_clzll) || FMT_GCC_VERSION || FMT_ICC_VERSION
+#    define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n)
+#  endif
+#endif
+
+// __builtin_ctz is broken in Intel Compiler Classic on Windows:
+// https://github.com/fmtlib/fmt/issues/2510.
+#ifndef __ICL
+#  if FMT_HAS_BUILTIN(__builtin_ctz) || FMT_GCC_VERSION || FMT_ICC_VERSION || \
+      defined(__NVCOMPILER)
+#    define FMT_BUILTIN_CTZ(n) __builtin_ctz(n)
+#  endif
+#  if FMT_HAS_BUILTIN(__builtin_ctzll) || FMT_GCC_VERSION || \
+      FMT_ICC_VERSION || defined(__NVCOMPILER)
+#    define FMT_BUILTIN_CTZLL(n) __builtin_ctzll(n)
+#  endif
+#endif
+
+#if FMT_MSC_VERSION
+#  include <intrin.h>  // _BitScanReverse[64], _BitScanForward[64], _umul128
+#endif
+
+// Some compilers masquerade as both MSVC and GCC-likes or otherwise support
+// __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the
+// MSVC intrinsics if the clz and clzll builtins are not available.
+#if FMT_MSC_VERSION && !defined(FMT_BUILTIN_CLZLL) && \
+    !defined(FMT_BUILTIN_CTZLL)
+FMT_BEGIN_NAMESPACE
+namespace detail {
+// Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning.
+#  if !defined(__clang__)
+#    pragma intrinsic(_BitScanForward)
+#    pragma intrinsic(_BitScanReverse)
+#    if defined(_WIN64)
+#      pragma intrinsic(_BitScanForward64)
+#      pragma intrinsic(_BitScanReverse64)
+#    endif
+#  endif
+
+inline auto clz(uint32_t x) -> int {
+  unsigned long r = 0;
+  _BitScanReverse(&r, x);
+  FMT_ASSERT(x != 0, "");
+  // Static analysis complains about using uninitialized data
+  // "r", but the only way that can happen is if "x" is 0,
+  // which the callers guarantee to not happen.
+  FMT_MSC_WARNING(suppress : 6102)
+  return 31 ^ static_cast<int>(r);
+}
+#  define FMT_BUILTIN_CLZ(n) detail::clz(n)
+
+inline auto clzll(uint64_t x) -> int {
+  unsigned long r = 0;
+#  ifdef _WIN64
+  _BitScanReverse64(&r, x);
+#  else
+  // Scan the high 32 bits.
+  if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32)))
+    return 63 ^ static_cast<int>(r + 32);
+  // Scan the low 32 bits.
+  _BitScanReverse(&r, static_cast<uint32_t>(x));
+#  endif
+  FMT_ASSERT(x != 0, "");
+  FMT_MSC_WARNING(suppress : 6102)  // Suppress a bogus static analysis warning.
+  return 63 ^ static_cast<int>(r);
+}
+#  define FMT_BUILTIN_CLZLL(n) detail::clzll(n)
+
+inline auto ctz(uint32_t x) -> int {
+  unsigned long r = 0;
+  _BitScanForward(&r, x);
+  FMT_ASSERT(x != 0, "");
+  FMT_MSC_WARNING(suppress : 6102)  // Suppress a bogus static analysis warning.
+  return static_cast<int>(r);
+}
+#  define FMT_BUILTIN_CTZ(n) detail::ctz(n)
+
+inline auto ctzll(uint64_t x) -> int {
+  unsigned long r = 0;
+  FMT_ASSERT(x != 0, "");
+  FMT_MSC_WARNING(suppress : 6102)  // Suppress a bogus static analysis warning.
+#  ifdef _WIN64
+  _BitScanForward64(&r, x);
+#  else
+  // Scan the low 32 bits.
+  if (_BitScanForward(&r, static_cast<uint32_t>(x))) return static_cast<int>(r);
+  // Scan the high 32 bits.
+  _BitScanForward(&r, static_cast<uint32_t>(x >> 32));
+  r += 32;
+#  endif
+  return static_cast<int>(r);
+}
+#  define FMT_BUILTIN_CTZLL(n) detail::ctzll(n)
+}  // namespace detail
+FMT_END_NAMESPACE
+#endif
+
+FMT_BEGIN_NAMESPACE
+
+template <typename...> struct disjunction : std::false_type {};
+template <typename P> struct disjunction<P> : P {};
+template <typename P1, typename... Pn>
+struct disjunction<P1, Pn...>
+    : conditional_t<bool(P1::value), P1, disjunction<Pn...>> {};
+
+template <typename...> struct conjunction : std::true_type {};
+template <typename P> struct conjunction<P> : P {};
+template <typename P1, typename... Pn>
+struct conjunction<P1, Pn...>
+    : conditional_t<bool(P1::value), conjunction<Pn...>, P1> {};
+
+namespace detail {
+
+FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) {
+  ignore_unused(condition);
+#ifdef FMT_FUZZ
+  if (condition) throw std::runtime_error("fuzzing limit reached");
+#endif
+}
+
+template <typename CharT, CharT... C> struct string_literal {
+  static constexpr CharT value[sizeof...(C)] = {C...};
+  constexpr operator basic_string_view<CharT>() const {
+    return {value, sizeof...(C)};
+  }
+};
+
+#if FMT_CPLUSPLUS < 201703L
+template <typename CharT, CharT... C>
+constexpr CharT string_literal<CharT, C...>::value[sizeof...(C)];
+#endif
+
+template <typename Streambuf> class formatbuf : public Streambuf {
+ private:
+  using char_type = typename Streambuf::char_type;
+  using streamsize = decltype(std::declval<Streambuf>().sputn(nullptr, 0));
+  using int_type = typename Streambuf::int_type;
+  using traits_type = typename Streambuf::traits_type;
+
+  buffer<char_type>& buffer_;
+
+ public:
+  explicit formatbuf(buffer<char_type>& buf) : buffer_(buf) {}
+
+ protected:
+  // The put area is always empty. This makes the implementation simpler and has
+  // the advantage that the streambuf and the buffer are always in sync and
+  // sputc never writes into uninitialized memory. A disadvantage is that each
+  // call to sputc always results in a (virtual) call to overflow. There is no
+  // disadvantage here for sputn since this always results in a call to xsputn.
+
+  auto overflow(int_type ch) -> int_type override {
+    if (!traits_type::eq_int_type(ch, traits_type::eof()))
+      buffer_.push_back(static_cast<char_type>(ch));
+    return ch;
+  }
+
+  auto xsputn(const char_type* s, streamsize count) -> streamsize override {
+    buffer_.append(s, s + count);
+    return count;
+  }
+};
+
+// Implementation of std::bit_cast for pre-C++20.
+template <typename To, typename From, FMT_ENABLE_IF(sizeof(To) == sizeof(From))>
+FMT_CONSTEXPR20 auto bit_cast(const From& from) -> To {
+#ifdef __cpp_lib_bit_cast
+  if (is_constant_evaluated()) return std::bit_cast<To>(from);
+#endif
+  auto to = To();
+  // The cast suppresses a bogus -Wclass-memaccess on GCC.
+  std::memcpy(static_cast<void*>(&to), &from, sizeof(to));
+  return to;
+}
+
+inline auto is_big_endian() -> bool {
+#ifdef _WIN32
+  return false;
+#elif defined(__BIG_ENDIAN__)
+  return true;
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+  return __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__;
+#else
+  struct bytes {
+    char data[sizeof(int)];
+  };
+  return bit_cast<bytes>(1).data[0] == 0;
+#endif
+}
+
+class uint128_fallback {
+ private:
+  uint64_t lo_, hi_;
+
+ public:
+  constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {}
+  constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {}
+
+  constexpr uint64_t high() const noexcept { return hi_; }
+  constexpr uint64_t low() const noexcept { return lo_; }
+
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  constexpr explicit operator T() const {
+    return static_cast<T>(lo_);
+  }
+
+  friend constexpr auto operator==(const uint128_fallback& lhs,
+                                   const uint128_fallback& rhs) -> bool {
+    return lhs.hi_ == rhs.hi_ && lhs.lo_ == rhs.lo_;
+  }
+  friend constexpr auto operator!=(const uint128_fallback& lhs,
+                                   const uint128_fallback& rhs) -> bool {
+    return !(lhs == rhs);
+  }
+  friend constexpr auto operator>(const uint128_fallback& lhs,
+                                  const uint128_fallback& rhs) -> bool {
+    return lhs.hi_ != rhs.hi_ ? lhs.hi_ > rhs.hi_ : lhs.lo_ > rhs.lo_;
+  }
+  friend constexpr auto operator|(const uint128_fallback& lhs,
+                                  const uint128_fallback& rhs)
+      -> uint128_fallback {
+    return {lhs.hi_ | rhs.hi_, lhs.lo_ | rhs.lo_};
+  }
+  friend constexpr auto operator&(const uint128_fallback& lhs,
+                                  const uint128_fallback& rhs)
+      -> uint128_fallback {
+    return {lhs.hi_ & rhs.hi_, lhs.lo_ & rhs.lo_};
+  }
+  friend constexpr auto operator~(const uint128_fallback& n)
+      -> uint128_fallback {
+    return {~n.hi_, ~n.lo_};
+  }
+  friend auto operator+(const uint128_fallback& lhs,
+                        const uint128_fallback& rhs) -> uint128_fallback {
+    auto result = uint128_fallback(lhs);
+    result += rhs;
+    return result;
+  }
+  friend auto operator*(const uint128_fallback& lhs, uint32_t rhs)
+      -> uint128_fallback {
+    FMT_ASSERT(lhs.hi_ == 0, "");
+    uint64_t hi = (lhs.lo_ >> 32) * rhs;
+    uint64_t lo = (lhs.lo_ & ~uint32_t()) * rhs;
+    uint64_t new_lo = (hi << 32) + lo;
+    return {(hi >> 32) + (new_lo < lo ? 1 : 0), new_lo};
+  }
+  friend auto operator-(const uint128_fallback& lhs, uint64_t rhs)
+      -> uint128_fallback {
+    return {lhs.hi_ - (lhs.lo_ < rhs ? 1 : 0), lhs.lo_ - rhs};
+  }
+  FMT_CONSTEXPR auto operator>>(int shift) const -> uint128_fallback {
+    if (shift == 64) return {0, hi_};
+    if (shift > 64) return uint128_fallback(0, hi_) >> (shift - 64);
+    return {hi_ >> shift, (hi_ << (64 - shift)) | (lo_ >> shift)};
+  }
+  FMT_CONSTEXPR auto operator<<(int shift) const -> uint128_fallback {
+    if (shift == 64) return {lo_, 0};
+    if (shift > 64) return uint128_fallback(lo_, 0) << (shift - 64);
+    return {hi_ << shift | (lo_ >> (64 - shift)), (lo_ << shift)};
+  }
+  FMT_CONSTEXPR auto operator>>=(int shift) -> uint128_fallback& {
+    return *this = *this >> shift;
+  }
+  FMT_CONSTEXPR void operator+=(uint128_fallback n) {
+    uint64_t new_lo = lo_ + n.lo_;
+    uint64_t new_hi = hi_ + n.hi_ + (new_lo < lo_ ? 1 : 0);
+    FMT_ASSERT(new_hi >= hi_, "");
+    lo_ = new_lo;
+    hi_ = new_hi;
+  }
+  FMT_CONSTEXPR void operator&=(uint128_fallback n) {
+    lo_ &= n.lo_;
+    hi_ &= n.hi_;
+  }
+
+  FMT_CONSTEXPR20 uint128_fallback& operator+=(uint64_t n) noexcept {
+    if (is_constant_evaluated()) {
+      lo_ += n;
+      hi_ += (lo_ < n ? 1 : 0);
+      return *this;
+    }
+#if FMT_HAS_BUILTIN(__builtin_addcll) && !defined(__ibmxl__)
+    unsigned long long carry;
+    lo_ = __builtin_addcll(lo_, n, 0, &carry);
+    hi_ += carry;
+#elif FMT_HAS_BUILTIN(__builtin_ia32_addcarryx_u64) && !defined(__ibmxl__)
+    unsigned long long result;
+    auto carry = __builtin_ia32_addcarryx_u64(0, lo_, n, &result);
+    lo_ = result;
+    hi_ += carry;
+#elif defined(_MSC_VER) && defined(_M_X64)
+    auto carry = _addcarry_u64(0, lo_, n, &lo_);
+    _addcarry_u64(carry, hi_, 0, &hi_);
+#else
+    lo_ += n;
+    hi_ += (lo_ < n ? 1 : 0);
+#endif
+    return *this;
+  }
+};
+
+using uint128_t = conditional_t<FMT_USE_INT128, uint128_opt, uint128_fallback>;
+
+#ifdef UINTPTR_MAX
+using uintptr_t = ::uintptr_t;
+#else
+using uintptr_t = uint128_t;
+#endif
+
+// Returns the largest possible value for type T. Same as
+// std::numeric_limits<T>::max() but shorter and not affected by the max macro.
+template <typename T> constexpr auto max_value() -> T {
+  return (std::numeric_limits<T>::max)();
+}
+template <typename T> constexpr auto num_bits() -> int {
+  return std::numeric_limits<T>::digits;
+}
+// std::numeric_limits<T>::digits may return 0 for 128-bit ints.
+template <> constexpr auto num_bits<int128_opt>() -> int { return 128; }
+template <> constexpr auto num_bits<uint128_t>() -> int { return 128; }
+
+// A heterogeneous bit_cast used for converting 96-bit long double to uint128_t
+// and 128-bit pointers to uint128_fallback.
+template <typename To, typename From, FMT_ENABLE_IF(sizeof(To) > sizeof(From))>
+inline auto bit_cast(const From& from) -> To {
+  constexpr auto size = static_cast<int>(sizeof(From) / sizeof(unsigned));
+  struct data_t {
+    unsigned value[static_cast<unsigned>(size)];
+  } data = bit_cast<data_t>(from);
+  auto result = To();
+  if (const_check(is_big_endian())) {
+    for (int i = 0; i < size; ++i)
+      result = (result << num_bits<unsigned>()) | data.value[i];
+  } else {
+    for (int i = size - 1; i >= 0; --i)
+      result = (result << num_bits<unsigned>()) | data.value[i];
+  }
+  return result;
+}
+
+template <typename UInt>
+FMT_CONSTEXPR20 inline auto countl_zero_fallback(UInt n) -> int {
+  int lz = 0;
+  constexpr UInt msb_mask = static_cast<UInt>(1) << (num_bits<UInt>() - 1);
+  for (; (n & msb_mask) == 0; n <<= 1) lz++;
+  return lz;
+}
+
+FMT_CONSTEXPR20 inline auto countl_zero(uint32_t n) -> int {
+#ifdef FMT_BUILTIN_CLZ
+  if (!is_constant_evaluated()) return FMT_BUILTIN_CLZ(n);
+#endif
+  return countl_zero_fallback(n);
+}
+
+FMT_CONSTEXPR20 inline auto countl_zero(uint64_t n) -> int {
+#ifdef FMT_BUILTIN_CLZLL
+  if (!is_constant_evaluated()) return FMT_BUILTIN_CLZLL(n);
+#endif
+  return countl_zero_fallback(n);
+}
+
+FMT_INLINE void assume(bool condition) {
+  (void)condition;
+#if FMT_HAS_BUILTIN(__builtin_assume) && !FMT_ICC_VERSION
+  __builtin_assume(condition);
+#elif FMT_GCC_VERSION
+  if (!condition) __builtin_unreachable();
+#endif
+}
+
+// An approximation of iterator_t for pre-C++20 systems.
+template <typename T>
+using iterator_t = decltype(std::begin(std::declval<T&>()));
+template <typename T> using sentinel_t = decltype(std::end(std::declval<T&>()));
+
+// A workaround for std::string not having mutable data() until C++17.
+template <typename Char>
+inline auto get_data(std::basic_string<Char>& s) -> Char* {
+  return &s[0];
+}
+template <typename Container>
+inline auto get_data(Container& c) -> typename Container::value_type* {
+  return c.data();
+}
+
+// Attempts to reserve space for n extra characters in the output range.
+// Returns a pointer to the reserved range or a reference to it.
+template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
+#if FMT_CLANG_VERSION >= 307 && !FMT_ICC_VERSION
+__attribute__((no_sanitize("undefined")))
+#endif
+inline auto
+reserve(std::back_insert_iterator<Container> it, size_t n) ->
+    typename Container::value_type* {
+  Container& c = get_container(it);
+  size_t size = c.size();
+  c.resize(size + n);
+  return get_data(c) + size;
+}
+
+template <typename T>
+inline auto reserve(buffer_appender<T> it, size_t n) -> buffer_appender<T> {
+  buffer<T>& buf = get_container(it);
+  buf.try_reserve(buf.size() + n);
+  return it;
+}
+
+template <typename Iterator>
+constexpr auto reserve(Iterator& it, size_t) -> Iterator& {
+  return it;
+}
+
+template <typename OutputIt>
+using reserve_iterator =
+    remove_reference_t<decltype(reserve(std::declval<OutputIt&>(), 0))>;
+
+template <typename T, typename OutputIt>
+constexpr auto to_pointer(OutputIt, size_t) -> T* {
+  return nullptr;
+}
+template <typename T> auto to_pointer(buffer_appender<T> it, size_t n) -> T* {
+  buffer<T>& buf = get_container(it);
+  auto size = buf.size();
+  if (buf.capacity() < size + n) return nullptr;
+  buf.try_resize(size + n);
+  return buf.data() + size;
+}
+
+template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
+inline auto base_iterator(std::back_insert_iterator<Container> it,
+                          typename Container::value_type*)
+    -> std::back_insert_iterator<Container> {
+  return it;
+}
+
+template <typename Iterator>
+constexpr auto base_iterator(Iterator, Iterator it) -> Iterator {
+  return it;
+}
+
+// <algorithm> is spectacularly slow to compile in C++20 so use a simple fill_n
+// instead (#1998).
+template <typename OutputIt, typename Size, typename T>
+FMT_CONSTEXPR auto fill_n(OutputIt out, Size count, const T& value)
+    -> OutputIt {
+  for (Size i = 0; i < count; ++i) *out++ = value;
+  return out;
+}
+template <typename T, typename Size>
+FMT_CONSTEXPR20 auto fill_n(T* out, Size count, char value) -> T* {
+  if (is_constant_evaluated()) {
+    return fill_n<T*, Size, T>(out, count, value);
+  }
+  std::memset(out, value, to_unsigned(count));
+  return out + count;
+}
+
+#ifdef __cpp_char8_t
+using char8_type = char8_t;
+#else
+enum char8_type : unsigned char {};
+#endif
+
+template <typename OutChar, typename InputIt, typename OutputIt>
+FMT_CONSTEXPR FMT_NOINLINE auto copy_str_noinline(InputIt begin, InputIt end,
+                                                  OutputIt out) -> OutputIt {
+  return copy_str<OutChar>(begin, end, out);
+}
+
+// A public domain branchless UTF-8 decoder by Christopher Wellons:
+// https://github.com/skeeto/branchless-utf8
+/* Decode the next character, c, from s, reporting errors in e.
+ *
+ * Since this is a branchless decoder, four bytes will be read from the
+ * buffer regardless of the actual length of the next character. This
+ * means the buffer _must_ have at least three bytes of zero padding
+ * following the end of the data stream.
+ *
+ * Errors are reported in e, which will be non-zero if the parsed
+ * character was somehow invalid: invalid byte sequence, non-canonical
+ * encoding, or a surrogate half.
+ *
+ * The function returns a pointer to the next character. When an error
+ * occurs, this pointer will be a guess that depends on the particular
+ * error, but it will always advance at least one byte.
+ */
+FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e)
+    -> const char* {
+  constexpr const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
+  constexpr const uint32_t mins[] = {4194304, 0, 128, 2048, 65536};
+  constexpr const int shiftc[] = {0, 18, 12, 6, 0};
+  constexpr const int shifte[] = {0, 6, 4, 2, 0};
+
+  int len = "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0\0\0\2\2\2\2\3\3\4"
+      [static_cast<unsigned char>(*s) >> 3];
+  // Compute the pointer to the next character early so that the next
+  // iteration can start working on the next character. Neither Clang
+  // nor GCC figure out this reordering on their own.
+  const char* next = s + len + !len;
+
+  using uchar = unsigned char;
+
+  // Assume a four-byte character and load four bytes. Unused bits are
+  // shifted out.
+  *c = uint32_t(uchar(s[0]) & masks[len]) << 18;
+  *c |= uint32_t(uchar(s[1]) & 0x3f) << 12;
+  *c |= uint32_t(uchar(s[2]) & 0x3f) << 6;
+  *c |= uint32_t(uchar(s[3]) & 0x3f) << 0;
+  *c >>= shiftc[len];
+
+  // Accumulate the various error conditions.
+  *e = (*c < mins[len]) << 6;       // non-canonical encoding
+  *e |= ((*c >> 11) == 0x1b) << 7;  // surrogate half?
+  *e |= (*c > 0x10FFFF) << 8;       // out of range?
+  *e |= (uchar(s[1]) & 0xc0) >> 2;
+  *e |= (uchar(s[2]) & 0xc0) >> 4;
+  *e |= uchar(s[3]) >> 6;
+  *e ^= 0x2a;  // top two bits of each tail byte correct?
+  *e >>= shifte[len];
+
+  return next;
+}
+
+constexpr FMT_INLINE_VARIABLE uint32_t invalid_code_point = ~uint32_t();
+
+// Invokes f(cp, sv) for every code point cp in s with sv being the string view
+// corresponding to the code point. cp is invalid_code_point on error.
+template <typename F>
+FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) {
+  auto decode = [f](const char* buf_ptr, const char* ptr) {
+    auto cp = uint32_t();
+    auto error = 0;
+    auto end = utf8_decode(buf_ptr, &cp, &error);
+    bool result = f(error ? invalid_code_point : cp,
+                    string_view(ptr, error ? 1 : to_unsigned(end - buf_ptr)));
+    return result ? (error ? buf_ptr + 1 : end) : nullptr;
+  };
+  auto p = s.data();
+  const size_t block_size = 4;  // utf8_decode always reads blocks of 4 chars.
+  if (s.size() >= block_size) {
+    for (auto end = p + s.size() - block_size + 1; p < end;) {
+      p = decode(p, p);
+      if (!p) return;
+    }
+  }
+  if (auto num_chars_left = s.data() + s.size() - p) {
+    char buf[2 * block_size - 1] = {};
+    copy_str<char>(p, p + num_chars_left, buf);
+    const char* buf_ptr = buf;
+    do {
+      auto end = decode(buf_ptr, p);
+      if (!end) return;
+      p += end - buf_ptr;
+      buf_ptr = end;
+    } while (buf_ptr - buf < num_chars_left);
+  }
+}
+
+template <typename Char>
+inline auto compute_width(basic_string_view<Char> s) -> size_t {
+  return s.size();
+}
+
+// Computes approximate display width of a UTF-8 string.
+FMT_CONSTEXPR inline size_t compute_width(string_view s) {
+  size_t num_code_points = 0;
+  // It is not a lambda for compatibility with C++14.
+  struct count_code_points {
+    size_t* count;
+    FMT_CONSTEXPR auto operator()(uint32_t cp, string_view) const -> bool {
+      *count += detail::to_unsigned(
+          1 +
+          (cp >= 0x1100 &&
+           (cp <= 0x115f ||  // Hangul Jamo init. consonants
+            cp == 0x2329 ||  // LEFT-POINTING ANGLE BRACKET
+            cp == 0x232a ||  // RIGHT-POINTING ANGLE BRACKET
+            // CJK ... Yi except IDEOGRAPHIC HALF FILL SPACE:
+            (cp >= 0x2e80 && cp <= 0xa4cf && cp != 0x303f) ||
+            (cp >= 0xac00 && cp <= 0xd7a3) ||    // Hangul Syllables
+            (cp >= 0xf900 && cp <= 0xfaff) ||    // CJK Compatibility Ideographs
+            (cp >= 0xfe10 && cp <= 0xfe19) ||    // Vertical Forms
+            (cp >= 0xfe30 && cp <= 0xfe6f) ||    // CJK Compatibility Forms
+            (cp >= 0xff00 && cp <= 0xff60) ||    // Fullwidth Forms
+            (cp >= 0xffe0 && cp <= 0xffe6) ||    // Fullwidth Forms
+            (cp >= 0x20000 && cp <= 0x2fffd) ||  // CJK
+            (cp >= 0x30000 && cp <= 0x3fffd) ||
+            // Miscellaneous Symbols and Pictographs + Emoticons:
+            (cp >= 0x1f300 && cp <= 0x1f64f) ||
+            // Supplemental Symbols and Pictographs:
+            (cp >= 0x1f900 && cp <= 0x1f9ff))));
+      return true;
+    }
+  };
+  // We could avoid branches by using utf8_decode directly.
+  for_each_codepoint(s, count_code_points{&num_code_points});
+  return num_code_points;
+}
+
+inline auto compute_width(basic_string_view<char8_type> s) -> size_t {
+  return compute_width(
+      string_view(reinterpret_cast<const char*>(s.data()), s.size()));
+}
+
+template <typename Char>
+inline auto code_point_index(basic_string_view<Char> s, size_t n) -> size_t {
+  size_t size = s.size();
+  return n < size ? n : size;
+}
+
+// Calculates the index of the nth code point in a UTF-8 string.
+inline auto code_point_index(string_view s, size_t n) -> size_t {
+  const char* data = s.data();
+  size_t num_code_points = 0;
+  for (size_t i = 0, size = s.size(); i != size; ++i) {
+    if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) return i;
+  }
+  return s.size();
+}
+
+inline auto code_point_index(basic_string_view<char8_type> s, size_t n)
+    -> size_t {
+  return code_point_index(
+      string_view(reinterpret_cast<const char*>(s.data()), s.size()), n);
+}
+
+template <typename T> struct is_integral : std::is_integral<T> {};
+template <> struct is_integral<int128_opt> : std::true_type {};
+template <> struct is_integral<uint128_t> : std::true_type {};
+
+template <typename T>
+using is_signed =
+    std::integral_constant<bool, std::numeric_limits<T>::is_signed ||
+                                     std::is_same<T, int128_opt>::value>;
+
+template <typename T>
+using is_integer =
+    bool_constant<is_integral<T>::value && !std::is_same<T, bool>::value &&
+                  !std::is_same<T, char>::value &&
+                  !std::is_same<T, wchar_t>::value>;
+
+#ifndef FMT_USE_FLOAT
+#  define FMT_USE_FLOAT 1
+#endif
+#ifndef FMT_USE_DOUBLE
+#  define FMT_USE_DOUBLE 1
+#endif
+#ifndef FMT_USE_LONG_DOUBLE
+#  define FMT_USE_LONG_DOUBLE 1
+#endif
+
+#ifndef FMT_USE_FLOAT128
+#  ifdef __clang__
+// Clang emulates GCC, so it has to appear early.
+#    if FMT_HAS_INCLUDE(<quadmath.h>)
+#      define FMT_USE_FLOAT128 1
+#    endif
+#  elif defined(__GNUC__)
+// GNU C++:
+#    if defined(_GLIBCXX_USE_FLOAT128) && !defined(__STRICT_ANSI__)
+#      define FMT_USE_FLOAT128 1
+#    endif
+#  endif
+#  ifndef FMT_USE_FLOAT128
+#    define FMT_USE_FLOAT128 0
+#  endif
+#endif
+
+#if FMT_USE_FLOAT128
+using float128 = __float128;
+#else
+using float128 = void;
+#endif
+template <typename T> using is_float128 = std::is_same<T, float128>;
+
+template <typename T>
+using is_floating_point =
+    bool_constant<std::is_floating_point<T>::value || is_float128<T>::value>;
+
+template <typename T, bool = std::is_floating_point<T>::value>
+struct is_fast_float : bool_constant<std::numeric_limits<T>::is_iec559 &&
+                                     sizeof(T) <= sizeof(double)> {};
+template <typename T> struct is_fast_float<T, false> : std::false_type {};
+
+template <typename T>
+using is_double_double = bool_constant<std::numeric_limits<T>::digits == 106>;
+
+#ifndef FMT_USE_FULL_CACHE_DRAGONBOX
+#  define FMT_USE_FULL_CACHE_DRAGONBOX 0
+#endif
+
+template <typename T>
+template <typename U>
+void buffer<T>::append(const U* begin, const U* end) {
+  while (begin != end) {
+    auto count = to_unsigned(end - begin);
+    try_reserve(size_ + count);
+    auto free_cap = capacity_ - size_;
+    if (free_cap < count) count = free_cap;
+    std::uninitialized_copy_n(begin, count, ptr_ + size_);
+    size_ += count;
+    begin += count;
+  }
+}
+
+template <typename T, typename Enable = void>
+struct is_locale : std::false_type {};
+template <typename T>
+struct is_locale<T, void_t<decltype(T::classic())>> : std::true_type {};
+}  // namespace detail
+
+FMT_BEGIN_EXPORT
+
+// The number of characters to store in the basic_memory_buffer object itself
+// to avoid dynamic memory allocation.
+enum { inline_buffer_size = 500 };
+
+/**
+  \rst
+  A dynamically growing memory buffer for trivially copyable/constructible types
+  with the first ``SIZE`` elements stored in the object itself.
+
+  You can use the ``memory_buffer`` type alias for ``char`` instead.
+
+  **Example**::
+
+     auto out = fmt::memory_buffer();
+     format_to(std::back_inserter(out), "The answer is {}.", 42);
+
+  This will append the following output to the ``out`` object:
+
+  .. code-block:: none
+
+     The answer is 42.
+
+  The output can be converted to an ``std::string`` with ``to_string(out)``.
+  \endrst
+ */
+template <typename T, size_t SIZE = inline_buffer_size,
+          typename Allocator = std::allocator<T>>
+class basic_memory_buffer final : public detail::buffer<T> {
+ private:
+  T store_[SIZE];
+
+  // Don't inherit from Allocator to avoid generating type_info for it.
+  FMT_NO_UNIQUE_ADDRESS Allocator alloc_;
+
+  // Deallocate memory allocated by the buffer.
+  FMT_CONSTEXPR20 void deallocate() {
+    T* data = this->data();
+    if (data != store_) alloc_.deallocate(data, this->capacity());
+  }
+
+ protected:
+  FMT_CONSTEXPR20 void grow(size_t size) override {
+    detail::abort_fuzzing_if(size > 5000);
+    const size_t max_size = std::allocator_traits<Allocator>::max_size(alloc_);
+    size_t old_capacity = this->capacity();
+    size_t new_capacity = old_capacity + old_capacity / 2;
+    if (size > new_capacity)
+      new_capacity = size;
+    else if (new_capacity > max_size)
+      new_capacity = size > max_size ? size : max_size;
+    T* old_data = this->data();
+    T* new_data =
+        std::allocator_traits<Allocator>::allocate(alloc_, new_capacity);
+    // Suppress a bogus -Wstringop-overflow in gcc 13.1 (#3481).
+    detail::assume(this->size() <= new_capacity);
+    // The following code doesn't throw, so the raw pointer above doesn't leak.
+    std::uninitialized_copy_n(old_data, this->size(), new_data);
+    this->set(new_data, new_capacity);
+    // deallocate must not throw according to the standard, but even if it does,
+    // the buffer already uses the new storage and will deallocate it in
+    // destructor.
+    if (old_data != store_) alloc_.deallocate(old_data, old_capacity);
+  }
+
+ public:
+  using value_type = T;
+  using const_reference = const T&;
+
+  FMT_CONSTEXPR20 explicit basic_memory_buffer(
+      const Allocator& alloc = Allocator())
+      : alloc_(alloc) {
+    this->set(store_, SIZE);
+    if (detail::is_constant_evaluated()) detail::fill_n(store_, SIZE, T());
+  }
+  FMT_CONSTEXPR20 ~basic_memory_buffer() { deallocate(); }
+
+ private:
+  // Move data from other to this buffer.
+  FMT_CONSTEXPR20 void move(basic_memory_buffer& other) {
+    alloc_ = std::move(other.alloc_);
+    T* data = other.data();
+    size_t size = other.size(), capacity = other.capacity();
+    if (data == other.store_) {
+      this->set(store_, capacity);
+      detail::copy_str<T>(other.store_, other.store_ + size, store_);
+    } else {
+      this->set(data, capacity);
+      // Set pointer to the inline array so that delete is not called
+      // when deallocating.
+      other.set(other.store_, 0);
+      other.clear();
+    }
+    this->resize(size);
+  }
+
+ public:
+  /**
+    \rst
+    Constructs a :class:`fmt::basic_memory_buffer` object moving the content
+    of the other object to it.
+    \endrst
+   */
+  FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept {
+    move(other);
+  }
+
+  /**
+    \rst
+    Moves the content of the other ``basic_memory_buffer`` object to this one.
+    \endrst
+   */
+  auto operator=(basic_memory_buffer&& other) noexcept -> basic_memory_buffer& {
+    FMT_ASSERT(this != &other, "");
+    deallocate();
+    move(other);
+    return *this;
+  }
+
+  // Returns a copy of the allocator associated with this buffer.
+  auto get_allocator() const -> Allocator { return alloc_; }
+
+  /**
+    Resizes the buffer to contain *count* elements. If T is a POD type new
+    elements may not be initialized.
+   */
+  FMT_CONSTEXPR20 void resize(size_t count) { this->try_resize(count); }
+
+  /** Increases the buffer capacity to *new_capacity*. */
+  void reserve(size_t new_capacity) { this->try_reserve(new_capacity); }
+
+  // Directly append data into the buffer
+  using detail::buffer<T>::append;
+  template <typename ContiguousRange>
+  void append(const ContiguousRange& range) {
+    append(range.data(), range.data() + range.size());
+  }
+};
+
+using memory_buffer = basic_memory_buffer<char>;
+
+template <typename T, size_t SIZE, typename Allocator>
+struct is_contiguous<basic_memory_buffer<T, SIZE, Allocator>> : std::true_type {
+};
+
+FMT_END_EXPORT
+namespace detail {
+FMT_API bool write_console(std::FILE* f, string_view text);
+FMT_API void print(std::FILE*, string_view);
+}  // namespace detail
+
+FMT_BEGIN_EXPORT
+
+// Suppress a misleading warning in older versions of clang.
+#if FMT_CLANG_VERSION
+#  pragma clang diagnostic ignored "-Wweak-vtables"
+#endif
+
+/** An error reported from a formatting function. */
+class FMT_VISIBILITY("default") format_error : public std::runtime_error {
+ public:
+  using std::runtime_error::runtime_error;
+};
+
+namespace detail_exported {
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+template <typename Char, size_t N> struct fixed_string {
+  constexpr fixed_string(const Char (&str)[N]) {
+    detail::copy_str<Char, const Char*, Char*>(static_cast<const Char*>(str),
+                                               str + N, data);
+  }
+  Char data[N] = {};
+};
+#endif
+
+// Converts a compile-time string to basic_string_view.
+template <typename Char, size_t N>
+constexpr auto compile_string_to_view(const Char (&s)[N])
+    -> basic_string_view<Char> {
+  // Remove trailing NUL character if needed. Won't be present if this is used
+  // with a raw character array (i.e. not defined as a string).
+  return {s, N - (std::char_traits<Char>::to_int_type(s[N - 1]) == 0 ? 1 : 0)};
+}
+template <typename Char>
+constexpr auto compile_string_to_view(detail::std_string_view<Char> s)
+    -> basic_string_view<Char> {
+  return {s.data(), s.size()};
+}
+}  // namespace detail_exported
+
+class loc_value {
+ private:
+  basic_format_arg<format_context> value_;
+
+ public:
+  template <typename T, FMT_ENABLE_IF(!detail::is_float128<T>::value)>
+  loc_value(T value) : value_(detail::make_arg<format_context>(value)) {}
+
+  template <typename T, FMT_ENABLE_IF(detail::is_float128<T>::value)>
+  loc_value(T) {}
+
+  template <typename Visitor> auto visit(Visitor&& vis) -> decltype(vis(0)) {
+    return visit_format_arg(vis, value_);
+  }
+};
+
+// A locale facet that formats values in UTF-8.
+// It is parameterized on the locale to avoid the heavy <locale> include.
+template <typename Locale> class format_facet : public Locale::facet {
+ private:
+  std::string separator_;
+  std::string grouping_;
+  std::string decimal_point_;
+
+ protected:
+  virtual auto do_put(appender out, loc_value val,
+                      const format_specs<>& specs) const -> bool;
+
+ public:
+  static FMT_API typename Locale::id id;
+
+  explicit format_facet(Locale& loc);
+  explicit format_facet(string_view sep = "",
+                        std::initializer_list<unsigned char> g = {3},
+                        std::string decimal_point = ".")
+      : separator_(sep.data(), sep.size()),
+        grouping_(g.begin(), g.end()),
+        decimal_point_(decimal_point) {}
+
+  auto put(appender out, loc_value val, const format_specs<>& specs) const
+      -> bool {
+    return do_put(out, val, specs);
+  }
+};
+
+namespace detail {
+
+// Returns true if value is negative, false otherwise.
+// Same as `value < 0` but doesn't produce warnings if T is an unsigned type.
+template <typename T, FMT_ENABLE_IF(is_signed<T>::value)>
+constexpr auto is_negative(T value) -> bool {
+  return value < 0;
+}
+template <typename T, FMT_ENABLE_IF(!is_signed<T>::value)>
+constexpr auto is_negative(T) -> bool {
+  return false;
+}
+
+template <typename T>
+FMT_CONSTEXPR auto is_supported_floating_point(T) -> bool {
+  if (std::is_same<T, float>()) return FMT_USE_FLOAT;
+  if (std::is_same<T, double>()) return FMT_USE_DOUBLE;
+  if (std::is_same<T, long double>()) return FMT_USE_LONG_DOUBLE;
+  return true;
+}
+
+// Smallest of uint32_t, uint64_t, uint128_t that is large enough to
+// represent all values of an integral type T.
+template <typename T>
+using uint32_or_64_or_128_t =
+    conditional_t<num_bits<T>() <= 32 && !FMT_REDUCE_INT_INSTANTIATIONS,
+                  uint32_t,
+                  conditional_t<num_bits<T>() <= 64, uint64_t, uint128_t>>;
+template <typename T>
+using uint64_or_128_t = conditional_t<num_bits<T>() <= 64, uint64_t, uint128_t>;
+
+#define FMT_POWERS_OF_10(factor)                                             \
+  factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \
+      (factor)*1000000, (factor)*10000000, (factor)*100000000,               \
+      (factor)*1000000000
+
+// Converts value in the range [0, 100) to a string.
+constexpr const char* digits2(size_t value) {
+  // GCC generates slightly better code when value is pointer-size.
+  return &"0001020304050607080910111213141516171819"
+         "2021222324252627282930313233343536373839"
+         "4041424344454647484950515253545556575859"
+         "6061626364656667686970717273747576777879"
+         "8081828384858687888990919293949596979899"[value * 2];
+}
+
+// Sign is a template parameter to workaround a bug in gcc 4.8.
+template <typename Char, typename Sign> constexpr Char sign(Sign s) {
+#if !FMT_GCC_VERSION || FMT_GCC_VERSION >= 604
+  static_assert(std::is_same<Sign, sign_t>::value, "");
+#endif
+  return static_cast<Char>("\0-+ "[s]);
+}
+
+template <typename T> FMT_CONSTEXPR auto count_digits_fallback(T n) -> int {
+  int count = 1;
+  for (;;) {
+    // Integer division is slow so do it for a group of four digits instead
+    // of for every digit. The idea comes from the talk by Alexandrescu
+    // "Three Optimization Tips for C++". See speed-test for a comparison.
+    if (n < 10) return count;
+    if (n < 100) return count + 1;
+    if (n < 1000) return count + 2;
+    if (n < 10000) return count + 3;
+    n /= 10000u;
+    count += 4;
+  }
+}
+#if FMT_USE_INT128
+FMT_CONSTEXPR inline auto count_digits(uint128_opt n) -> int {
+  return count_digits_fallback(n);
+}
+#endif
+
+#ifdef FMT_BUILTIN_CLZLL
+// It is a separate function rather than a part of count_digits to workaround
+// the lack of static constexpr in constexpr functions.
+inline auto do_count_digits(uint64_t n) -> int {
+  // This has comparable performance to the version by Kendall Willets
+  // (https://github.com/fmtlib/format-benchmark/blob/master/digits10)
+  // but uses smaller tables.
+  // Maps bsr(n) to ceil(log10(pow(2, bsr(n) + 1) - 1)).
+  static constexpr uint8_t bsr2log10[] = {
+      1,  1,  1,  2,  2,  2,  3,  3,  3,  4,  4,  4,  4,  5,  5,  5,
+      6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  9,  9,  9,  10, 10, 10,
+      10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
+      15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, 20};
+  auto t = bsr2log10[FMT_BUILTIN_CLZLL(n | 1) ^ 63];
+  static constexpr const uint64_t zero_or_powers_of_10[] = {
+      0, 0, FMT_POWERS_OF_10(1U), FMT_POWERS_OF_10(1000000000ULL),
+      10000000000000000000ULL};
+  return t - (n < zero_or_powers_of_10[t]);
+}
+#endif
+
+// Returns the number of decimal digits in n. Leading zeros are not counted
+// except for n == 0 in which case count_digits returns 1.
+FMT_CONSTEXPR20 inline auto count_digits(uint64_t n) -> int {
+#ifdef FMT_BUILTIN_CLZLL
+  if (!is_constant_evaluated()) {
+    return do_count_digits(n);
+  }
+#endif
+  return count_digits_fallback(n);
+}
+
+// Counts the number of digits in n. BITS = log2(radix).
+template <int BITS, typename UInt>
+FMT_CONSTEXPR auto count_digits(UInt n) -> int {
+#ifdef FMT_BUILTIN_CLZ
+  if (!is_constant_evaluated() && num_bits<UInt>() == 32)
+    return (FMT_BUILTIN_CLZ(static_cast<uint32_t>(n) | 1) ^ 31) / BITS + 1;
+#endif
+  // Lambda avoids unreachable code warnings from NVHPC.
+  return [](UInt m) {
+    int num_digits = 0;
+    do {
+      ++num_digits;
+    } while ((m >>= BITS) != 0);
+    return num_digits;
+  }(n);
+}
+
+#ifdef FMT_BUILTIN_CLZ
+// It is a separate function rather than a part of count_digits to workaround
+// the lack of static constexpr in constexpr functions.
+FMT_INLINE auto do_count_digits(uint32_t n) -> int {
+// An optimization by Kendall Willets from https://bit.ly/3uOIQrB.
+// This increments the upper 32 bits (log10(T) - 1) when >= T is added.
+#  define FMT_INC(T) (((sizeof(#T) - 1ull) << 32) - T)
+  static constexpr uint64_t table[] = {
+      FMT_INC(0),          FMT_INC(0),          FMT_INC(0),           // 8
+      FMT_INC(10),         FMT_INC(10),         FMT_INC(10),          // 64
+      FMT_INC(100),        FMT_INC(100),        FMT_INC(100),         // 512
+      FMT_INC(1000),       FMT_INC(1000),       FMT_INC(1000),        // 4096
+      FMT_INC(10000),      FMT_INC(10000),      FMT_INC(10000),       // 32k
+      FMT_INC(100000),     FMT_INC(100000),     FMT_INC(100000),      // 256k
+      FMT_INC(1000000),    FMT_INC(1000000),    FMT_INC(1000000),     // 2048k
+      FMT_INC(10000000),   FMT_INC(10000000),   FMT_INC(10000000),    // 16M
+      FMT_INC(100000000),  FMT_INC(100000000),  FMT_INC(100000000),   // 128M
+      FMT_INC(1000000000), FMT_INC(1000000000), FMT_INC(1000000000),  // 1024M
+      FMT_INC(1000000000), FMT_INC(1000000000)                        // 4B
+  };
+  auto inc = table[FMT_BUILTIN_CLZ(n | 1) ^ 31];
+  return static_cast<int>((n + inc) >> 32);
+}
+#endif
+
+// Optional version of count_digits for better performance on 32-bit platforms.
+FMT_CONSTEXPR20 inline auto count_digits(uint32_t n) -> int {
+#ifdef FMT_BUILTIN_CLZ
+  if (!is_constant_evaluated()) {
+    return do_count_digits(n);
+  }
+#endif
+  return count_digits_fallback(n);
+}
+
+template <typename Int> constexpr auto digits10() noexcept -> int {
+  return std::numeric_limits<Int>::digits10;
+}
+template <> constexpr auto digits10<int128_opt>() noexcept -> int { return 38; }
+template <> constexpr auto digits10<uint128_t>() noexcept -> int { return 38; }
+
+template <typename Char> struct thousands_sep_result {
+  std::string grouping;
+  Char thousands_sep;
+};
+
+template <typename Char>
+FMT_API auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result<Char>;
+template <typename Char>
+inline auto thousands_sep(locale_ref loc) -> thousands_sep_result<Char> {
+  auto result = thousands_sep_impl<char>(loc);
+  return {result.grouping, Char(result.thousands_sep)};
+}
+template <>
+inline auto thousands_sep(locale_ref loc) -> thousands_sep_result<wchar_t> {
+  return thousands_sep_impl<wchar_t>(loc);
+}
+
+template <typename Char>
+FMT_API auto decimal_point_impl(locale_ref loc) -> Char;
+template <typename Char> inline auto decimal_point(locale_ref loc) -> Char {
+  return Char(decimal_point_impl<char>(loc));
+}
+template <> inline auto decimal_point(locale_ref loc) -> wchar_t {
+  return decimal_point_impl<wchar_t>(loc);
+}
+
+// Compares two characters for equality.
+template <typename Char> auto equal2(const Char* lhs, const char* rhs) -> bool {
+  return lhs[0] == Char(rhs[0]) && lhs[1] == Char(rhs[1]);
+}
+inline auto equal2(const char* lhs, const char* rhs) -> bool {
+  return memcmp(lhs, rhs, 2) == 0;
+}
+
+// Copies two characters from src to dst.
+template <typename Char>
+FMT_CONSTEXPR20 FMT_INLINE void copy2(Char* dst, const char* src) {
+  if (!is_constant_evaluated() && sizeof(Char) == sizeof(char)) {
+    memcpy(dst, src, 2);
+    return;
+  }
+  *dst++ = static_cast<Char>(*src++);
+  *dst = static_cast<Char>(*src);
+}
+
+template <typename Iterator> struct format_decimal_result {
+  Iterator begin;
+  Iterator end;
+};
+
+// Formats a decimal unsigned integer value writing into out pointing to a
+// buffer of specified size. The caller must ensure that the buffer is large
+// enough.
+template <typename Char, typename UInt>
+FMT_CONSTEXPR20 auto format_decimal(Char* out, UInt value, int size)
+    -> format_decimal_result<Char*> {
+  FMT_ASSERT(size >= count_digits(value), "invalid digit count");
+  out += size;
+  Char* end = out;
+  while (value >= 100) {
+    // Integer division is slow so do it for a group of two digits instead
+    // of for every digit. The idea comes from the talk by Alexandrescu
+    // "Three Optimization Tips for C++". See speed-test for a comparison.
+    out -= 2;
+    copy2(out, digits2(static_cast<size_t>(value % 100)));
+    value /= 100;
+  }
+  if (value < 10) {
+    *--out = static_cast<Char>('0' + value);
+    return {out, end};
+  }
+  out -= 2;
+  copy2(out, digits2(static_cast<size_t>(value)));
+  return {out, end};
+}
+
+template <typename Char, typename UInt, typename Iterator,
+          FMT_ENABLE_IF(!std::is_pointer<remove_cvref_t<Iterator>>::value)>
+FMT_CONSTEXPR inline auto format_decimal(Iterator out, UInt value, int size)
+    -> format_decimal_result<Iterator> {
+  // Buffer is large enough to hold all digits (digits10 + 1).
+  Char buffer[digits10<UInt>() + 1] = {};
+  auto end = format_decimal(buffer, value, size).end;
+  return {out, detail::copy_str_noinline<Char>(buffer, end, out)};
+}
+
+template <unsigned BASE_BITS, typename Char, typename UInt>
+FMT_CONSTEXPR auto format_uint(Char* buffer, UInt value, int num_digits,
+                               bool upper = false) -> Char* {
+  buffer += num_digits;
+  Char* end = buffer;
+  do {
+    const char* digits = upper ? "0123456789ABCDEF" : "0123456789abcdef";
+    unsigned digit = static_cast<unsigned>(value & ((1 << BASE_BITS) - 1));
+    *--buffer = static_cast<Char>(BASE_BITS < 4 ? static_cast<char>('0' + digit)
+                                                : digits[digit]);
+  } while ((value >>= BASE_BITS) != 0);
+  return end;
+}
+
+template <unsigned BASE_BITS, typename Char, typename It, typename UInt>
+FMT_CONSTEXPR inline auto format_uint(It out, UInt value, int num_digits,
+                                      bool upper = false) -> It {
+  if (auto ptr = to_pointer<Char>(out, to_unsigned(num_digits))) {
+    format_uint<BASE_BITS>(ptr, value, num_digits, upper);
+    return out;
+  }
+  // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1).
+  char buffer[num_bits<UInt>() / BASE_BITS + 1];
+  format_uint<BASE_BITS>(buffer, value, num_digits, upper);
+  return detail::copy_str_noinline<Char>(buffer, buffer + num_digits, out);
+}
+
+// A converter from UTF-8 to UTF-16.
+class utf8_to_utf16 {
+ private:
+  basic_memory_buffer<wchar_t> buffer_;
+
+ public:
+  FMT_API explicit utf8_to_utf16(string_view s);
+  operator basic_string_view<wchar_t>() const { return {&buffer_[0], size()}; }
+  auto size() const -> size_t { return buffer_.size() - 1; }
+  auto c_str() const -> const wchar_t* { return &buffer_[0]; }
+  auto str() const -> std::wstring { return {&buffer_[0], size()}; }
+};
+
+enum class to_utf8_error_policy { abort, replace };
+
+// A converter from UTF-16/UTF-32 (host endian) to UTF-8.
+template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
+ private:
+  Buffer buffer_;
+
+ public:
+  to_utf8() {}
+  explicit to_utf8(basic_string_view<WChar> s,
+                   to_utf8_error_policy policy = to_utf8_error_policy::abort) {
+    static_assert(sizeof(WChar) == 2 || sizeof(WChar) == 4,
+                  "Expect utf16 or utf32");
+    if (!convert(s, policy))
+      FMT_THROW(std::runtime_error(sizeof(WChar) == 2 ? "invalid utf16"
+                                                      : "invalid utf32"));
+  }
+  operator string_view() const { return string_view(&buffer_[0], size()); }
+  size_t size() const { return buffer_.size() - 1; }
+  const char* c_str() const { return &buffer_[0]; }
+  std::string str() const { return std::string(&buffer_[0], size()); }
+
+  // Performs conversion returning a bool instead of throwing exception on
+  // conversion error. This method may still throw in case of memory allocation
+  // error.
+  bool convert(basic_string_view<WChar> s,
+               to_utf8_error_policy policy = to_utf8_error_policy::abort) {
+    if (!convert(buffer_, s, policy)) return false;
+    buffer_.push_back(0);
+    return true;
+  }
+  static bool convert(
+      Buffer& buf, basic_string_view<WChar> s,
+      to_utf8_error_policy policy = to_utf8_error_policy::abort) {
+    for (auto p = s.begin(); p != s.end(); ++p) {
+      uint32_t c = static_cast<uint32_t>(*p);
+      if (sizeof(WChar) == 2 && c >= 0xd800 && c <= 0xdfff) {
+        // Handle a surrogate pair.
+        ++p;
+        if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) {
+          if (policy == to_utf8_error_policy::abort) return false;
+          buf.append(string_view("\xEF\xBF\xBD"));
+          --p;
+        } else {
+          c = (c << 10) + static_cast<uint32_t>(*p) - 0x35fdc00;
+        }
+      } else if (c < 0x80) {
+        buf.push_back(static_cast<char>(c));
+      } else if (c < 0x800) {
+        buf.push_back(static_cast<char>(0xc0 | (c >> 6)));
+        buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
+      } else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) {
+        buf.push_back(static_cast<char>(0xe0 | (c >> 12)));
+        buf.push_back(static_cast<char>(0x80 | ((c & 0xfff) >> 6)));
+        buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
+      } else if (c >= 0x10000 && c <= 0x10ffff) {
+        buf.push_back(static_cast<char>(0xf0 | (c >> 18)));
+        buf.push_back(static_cast<char>(0x80 | ((c & 0x3ffff) >> 12)));
+        buf.push_back(static_cast<char>(0x80 | ((c & 0xfff) >> 6)));
+        buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
+      } else {
+        return false;
+      }
+    }
+    return true;
+  }
+};
+
+// Computes 128-bit result of multiplication of two 64-bit unsigned integers.
+inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept {
+#if FMT_USE_INT128
+  auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
+  return {static_cast<uint64_t>(p >> 64), static_cast<uint64_t>(p)};
+#elif defined(_MSC_VER) && defined(_M_X64)
+  auto hi = uint64_t();
+  auto lo = _umul128(x, y, &hi);
+  return {hi, lo};
+#else
+  const uint64_t mask = static_cast<uint64_t>(max_value<uint32_t>());
+
+  uint64_t a = x >> 32;
+  uint64_t b = x & mask;
+  uint64_t c = y >> 32;
+  uint64_t d = y & mask;
+
+  uint64_t ac = a * c;
+  uint64_t bc = b * c;
+  uint64_t ad = a * d;
+  uint64_t bd = b * d;
+
+  uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask);
+
+  return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32),
+          (intermediate << 32) + (bd & mask)};
+#endif
+}
+
+namespace dragonbox {
+// Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from
+// https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1.
+inline int floor_log10_pow2(int e) noexcept {
+  FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent");
+  static_assert((-1 >> 1) == -1, "right shift is not arithmetic");
+  return (e * 315653) >> 20;
+}
+
+inline int floor_log2_pow10(int e) noexcept {
+  FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent");
+  return (e * 1741647) >> 19;
+}
+
+// Computes upper 64 bits of multiplication of two 64-bit unsigned integers.
+inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept {
+#if FMT_USE_INT128
+  auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
+  return static_cast<uint64_t>(p >> 64);
+#elif defined(_MSC_VER) && defined(_M_X64)
+  return __umulh(x, y);
+#else
+  return umul128(x, y).high();
+#endif
+}
+
+// Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a
+// 128-bit unsigned integer.
+inline uint128_fallback umul192_upper128(uint64_t x,
+                                         uint128_fallback y) noexcept {
+  uint128_fallback r = umul128(x, y.high());
+  r += umul128_upper64(x, y.low());
+  return r;
+}
+
+FMT_API uint128_fallback get_cached_power(int k) noexcept;
+
+// Type-specific information that Dragonbox uses.
+template <typename T, typename Enable = void> struct float_info;
+
+template <> struct float_info<float> {
+  using carrier_uint = uint32_t;
+  static const int exponent_bits = 8;
+  static const int kappa = 1;
+  static const int big_divisor = 100;
+  static const int small_divisor = 10;
+  static const int min_k = -31;
+  static const int max_k = 46;
+  static const int shorter_interval_tie_lower_threshold = -35;
+  static const int shorter_interval_tie_upper_threshold = -35;
+};
+
+template <> struct float_info<double> {
+  using carrier_uint = uint64_t;
+  static const int exponent_bits = 11;
+  static const int kappa = 2;
+  static const int big_divisor = 1000;
+  static const int small_divisor = 100;
+  static const int min_k = -292;
+  static const int max_k = 341;
+  static const int shorter_interval_tie_lower_threshold = -77;
+  static const int shorter_interval_tie_upper_threshold = -77;
+};
+
+// An 80- or 128-bit floating point number.
+template <typename T>
+struct float_info<T, enable_if_t<std::numeric_limits<T>::digits == 64 ||
+                                 std::numeric_limits<T>::digits == 113 ||
+                                 is_float128<T>::value>> {
+  using carrier_uint = detail::uint128_t;
+  static const int exponent_bits = 15;
+};
+
+// A double-double floating point number.
+template <typename T>
+struct float_info<T, enable_if_t<is_double_double<T>::value>> {
+  using carrier_uint = detail::uint128_t;
+};
+
+template <typename T> struct decimal_fp {
+  using significand_type = typename float_info<T>::carrier_uint;
+  significand_type significand;
+  int exponent;
+};
+
+template <typename T> FMT_API auto to_decimal(T x) noexcept -> decimal_fp<T>;
+}  // namespace dragonbox
+
+// Returns true iff Float has the implicit bit which is not stored.
+template <typename Float> constexpr bool has_implicit_bit() {
+  // An 80-bit FP number has a 64-bit significand an no implicit bit.
+  return std::numeric_limits<Float>::digits != 64;
+}
+
+// Returns the number of significand bits stored in Float. The implicit bit is
+// not counted since it is not stored.
+template <typename Float> constexpr int num_significand_bits() {
+  // std::numeric_limits may not support __float128.
+  return is_float128<Float>() ? 112
+                              : (std::numeric_limits<Float>::digits -
+                                 (has_implicit_bit<Float>() ? 1 : 0));
+}
+
+template <typename Float>
+constexpr auto exponent_mask() ->
+    typename dragonbox::float_info<Float>::carrier_uint {
+  using float_uint = typename dragonbox::float_info<Float>::carrier_uint;
+  return ((float_uint(1) << dragonbox::float_info<Float>::exponent_bits) - 1)
+         << num_significand_bits<Float>();
+}
+template <typename Float> constexpr auto exponent_bias() -> int {
+  // std::numeric_limits may not support __float128.
+  return is_float128<Float>() ? 16383
+                              : std::numeric_limits<Float>::max_exponent - 1;
+}
+
+// Writes the exponent exp in the form "[+-]d{2,3}" to buffer.
+template <typename Char, typename It>
+FMT_CONSTEXPR auto write_exponent(int exp, It it) -> It {
+  FMT_ASSERT(-10000 < exp && exp < 10000, "exponent out of range");
+  if (exp < 0) {
+    *it++ = static_cast<Char>('-');
+    exp = -exp;
+  } else {
+    *it++ = static_cast<Char>('+');
+  }
+  if (exp >= 100) {
+    const char* top = digits2(to_unsigned(exp / 100));
+    if (exp >= 1000) *it++ = static_cast<Char>(top[0]);
+    *it++ = static_cast<Char>(top[1]);
+    exp %= 100;
+  }
+  const char* d = digits2(to_unsigned(exp));
+  *it++ = static_cast<Char>(d[0]);
+  *it++ = static_cast<Char>(d[1]);
+  return it;
+}
+
+// A floating-point number f * pow(2, e) where F is an unsigned type.
+template <typename F> struct basic_fp {
+  F f;
+  int e;
+
+  static constexpr const int num_significand_bits =
+      static_cast<int>(sizeof(F) * num_bits<unsigned char>());
+
+  constexpr basic_fp() : f(0), e(0) {}
+  constexpr basic_fp(uint64_t f_val, int e_val) : f(f_val), e(e_val) {}
+
+  // Constructs fp from an IEEE754 floating-point number.
+  template <typename Float> FMT_CONSTEXPR basic_fp(Float n) { assign(n); }
+
+  // Assigns n to this and return true iff predecessor is closer than successor.
+  template <typename Float, FMT_ENABLE_IF(!is_double_double<Float>::value)>
+  FMT_CONSTEXPR auto assign(Float n) -> bool {
+    static_assert(std::numeric_limits<Float>::digits <= 113, "unsupported FP");
+    // Assume Float is in the format [sign][exponent][significand].
+    using carrier_uint = typename dragonbox::float_info<Float>::carrier_uint;
+    const auto num_float_significand_bits =
+        detail::num_significand_bits<Float>();
+    const auto implicit_bit = carrier_uint(1) << num_float_significand_bits;
+    const auto significand_mask = implicit_bit - 1;
+    auto u = bit_cast<carrier_uint>(n);
+    f = static_cast<F>(u & significand_mask);
+    auto biased_e = static_cast<int>((u & exponent_mask<Float>()) >>
+                                     num_float_significand_bits);
+    // The predecessor is closer if n is a normalized power of 2 (f == 0)
+    // other than the smallest normalized number (biased_e > 1).
+    auto is_predecessor_closer = f == 0 && biased_e > 1;
+    if (biased_e == 0)
+      biased_e = 1;  // Subnormals use biased exponent 1 (min exponent).
+    else if (has_implicit_bit<Float>())
+      f += static_cast<F>(implicit_bit);
+    e = biased_e - exponent_bias<Float>() - num_float_significand_bits;
+    if (!has_implicit_bit<Float>()) ++e;
+    return is_predecessor_closer;
+  }
+
+  template <typename Float, FMT_ENABLE_IF(is_double_double<Float>::value)>
+  FMT_CONSTEXPR auto assign(Float n) -> bool {
+    static_assert(std::numeric_limits<double>::is_iec559, "unsupported FP");
+    return assign(static_cast<double>(n));
+  }
+};
+
+using fp = basic_fp<unsigned long long>;
+
+// Normalizes the value converted from double and multiplied by (1 << SHIFT).
+template <int SHIFT = 0, typename F>
+FMT_CONSTEXPR basic_fp<F> normalize(basic_fp<F> value) {
+  // Handle subnormals.
+  const auto implicit_bit = F(1) << num_significand_bits<double>();
+  const auto shifted_implicit_bit = implicit_bit << SHIFT;
+  while ((value.f & shifted_implicit_bit) == 0) {
+    value.f <<= 1;
+    --value.e;
+  }
+  // Subtract 1 to account for hidden bit.
+  const auto offset = basic_fp<F>::num_significand_bits -
+                      num_significand_bits<double>() - SHIFT - 1;
+  value.f <<= offset;
+  value.e -= offset;
+  return value;
+}
+
+// Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking.
+FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) {
+#if FMT_USE_INT128
+  auto product = static_cast<__uint128_t>(lhs) * rhs;
+  auto f = static_cast<uint64_t>(product >> 64);
+  return (static_cast<uint64_t>(product) & (1ULL << 63)) != 0 ? f + 1 : f;
+#else
+  // Multiply 32-bit parts of significands.
+  uint64_t mask = (1ULL << 32) - 1;
+  uint64_t a = lhs >> 32, b = lhs & mask;
+  uint64_t c = rhs >> 32, d = rhs & mask;
+  uint64_t ac = a * c, bc = b * c, ad = a * d, bd = b * d;
+  // Compute mid 64-bit of result and round.
+  uint64_t mid = (bd >> 32) + (ad & mask) + (bc & mask) + (1U << 31);
+  return ac + (ad >> 32) + (bc >> 32) + (mid >> 32);
+#endif
+}
+
+FMT_CONSTEXPR inline fp operator*(fp x, fp y) {
+  return {multiply(x.f, y.f), x.e + y.e + 64};
+}
+
+template <typename T = void> struct basic_data {
+  // For checking rounding thresholds.
+  // The kth entry is chosen to be the smallest integer such that the
+  // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k.
+  static constexpr uint32_t fractional_part_rounding_thresholds[8] = {
+      2576980378U,  // ceil(2^31 + 2^32/10^1)
+      2190433321U,  // ceil(2^31 + 2^32/10^2)
+      2151778616U,  // ceil(2^31 + 2^32/10^3)
+      2147913145U,  // ceil(2^31 + 2^32/10^4)
+      2147526598U,  // ceil(2^31 + 2^32/10^5)
+      2147487943U,  // ceil(2^31 + 2^32/10^6)
+      2147484078U,  // ceil(2^31 + 2^32/10^7)
+      2147483691U   // ceil(2^31 + 2^32/10^8)
+  };
+};
+// This is a struct rather than an alias to avoid shadowing warnings in gcc.
+struct data : basic_data<> {};
+
+#if FMT_CPLUSPLUS < 201703L
+template <typename T>
+constexpr uint32_t basic_data<T>::fractional_part_rounding_thresholds[];
+#endif
+
+template <typename T, bool doublish = num_bits<T>() == num_bits<double>()>
+using convert_float_result =
+    conditional_t<std::is_same<T, float>::value || doublish, double, T>;
+
+template <typename T>
+constexpr auto convert_float(T value) -> convert_float_result<T> {
+  return static_cast<convert_float_result<T>>(value);
+}
+
+template <typename OutputIt, typename Char>
+FMT_NOINLINE FMT_CONSTEXPR auto fill(OutputIt it, size_t n,
+                                     const fill_t<Char>& fill) -> OutputIt {
+  auto fill_size = fill.size();
+  if (fill_size == 1) return detail::fill_n(it, n, fill[0]);
+  auto data = fill.data();
+  for (size_t i = 0; i < n; ++i)
+    it = copy_str<Char>(data, data + fill_size, it);
+  return it;
+}
+
+// Writes the output of f, padded according to format specifications in specs.
+// size: output size in code units.
+// width: output display width in (terminal) column positions.
+template <align::type align = align::left, typename OutputIt, typename Char,
+          typename F>
+FMT_CONSTEXPR auto write_padded(OutputIt out, const format_specs<Char>& specs,
+                                size_t size, size_t width, F&& f) -> OutputIt {
+  static_assert(align == align::left || align == align::right, "");
+  unsigned spec_width = to_unsigned(specs.width);
+  size_t padding = spec_width > width ? spec_width - width : 0;
+  // Shifts are encoded as string literals because static constexpr is not
+  // supported in constexpr functions.
+  auto* shifts = align == align::left ? "\x1f\x1f\x00\x01" : "\x00\x1f\x00\x01";
+  size_t left_padding = padding >> shifts[specs.align];
+  size_t right_padding = padding - left_padding;
+  auto it = reserve(out, size + padding * specs.fill.size());
+  if (left_padding != 0) it = fill(it, left_padding, specs.fill);
+  it = f(it);
+  if (right_padding != 0) it = fill(it, right_padding, specs.fill);
+  return base_iterator(out, it);
+}
+
+template <align::type align = align::left, typename OutputIt, typename Char,
+          typename F>
+constexpr auto write_padded(OutputIt out, const format_specs<Char>& specs,
+                            size_t size, F&& f) -> OutputIt {
+  return write_padded<align>(out, specs, size, size, f);
+}
+
+template <align::type align = align::left, typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write_bytes(OutputIt out, string_view bytes,
+                               const format_specs<Char>& specs) -> OutputIt {
+  return write_padded<align>(
+      out, specs, bytes.size(), [bytes](reserve_iterator<OutputIt> it) {
+        const char* data = bytes.data();
+        return copy_str<Char>(data, data + bytes.size(), it);
+      });
+}
+
+template <typename Char, typename OutputIt, typename UIntPtr>
+auto write_ptr(OutputIt out, UIntPtr value, const format_specs<Char>* specs)
+    -> OutputIt {
+  int num_digits = count_digits<4>(value);
+  auto size = to_unsigned(num_digits) + size_t(2);
+  auto write = [=](reserve_iterator<OutputIt> it) {
+    *it++ = static_cast<Char>('0');
+    *it++ = static_cast<Char>('x');
+    return format_uint<4, Char>(it, value, num_digits);
+  };
+  return specs ? write_padded<align::right>(out, *specs, size, write)
+               : base_iterator(out, write(reserve(out, size)));
+}
+
+// Returns true iff the code point cp is printable.
+FMT_API auto is_printable(uint32_t cp) -> bool;
+
+inline auto needs_escape(uint32_t cp) -> bool {
+  return cp < 0x20 || cp == 0x7f || cp == '"' || cp == '\\' ||
+         !is_printable(cp);
+}
+
+template <typename Char> struct find_escape_result {
+  const Char* begin;
+  const Char* end;
+  uint32_t cp;
+};
+
+template <typename Char>
+using make_unsigned_char =
+    typename conditional_t<std::is_integral<Char>::value,
+                           std::make_unsigned<Char>,
+                           type_identity<uint32_t>>::type;
+
+template <typename Char>
+auto find_escape(const Char* begin, const Char* end)
+    -> find_escape_result<Char> {
+  for (; begin != end; ++begin) {
+    uint32_t cp = static_cast<make_unsigned_char<Char>>(*begin);
+    if (const_check(sizeof(Char) == 1) && cp >= 0x80) continue;
+    if (needs_escape(cp)) return {begin, begin + 1, cp};
+  }
+  return {begin, nullptr, 0};
+}
+
+inline auto find_escape(const char* begin, const char* end)
+    -> find_escape_result<char> {
+  if (!is_utf8()) return find_escape<char>(begin, end);
+  auto result = find_escape_result<char>{end, nullptr, 0};
+  for_each_codepoint(string_view(begin, to_unsigned(end - begin)),
+                     [&](uint32_t cp, string_view sv) {
+                       if (needs_escape(cp)) {
+                         result = {sv.begin(), sv.end(), cp};
+                         return false;
+                       }
+                       return true;
+                     });
+  return result;
+}
+
+#define FMT_STRING_IMPL(s, base, explicit)                                    \
+  [] {                                                                        \
+    /* Use the hidden visibility as a workaround for a GCC bug (#1973). */    \
+    /* Use a macro-like name to avoid shadowing warnings. */                  \
+    struct FMT_VISIBILITY("hidden") FMT_COMPILE_STRING : base {               \
+      using char_type FMT_MAYBE_UNUSED = fmt::remove_cvref_t<decltype(s[0])>; \
+      FMT_MAYBE_UNUSED FMT_CONSTEXPR explicit                                 \
+      operator fmt::basic_string_view<char_type>() const {                    \
+        return fmt::detail_exported::compile_string_to_view<char_type>(s);    \
+      }                                                                       \
+    };                                                                        \
+    return FMT_COMPILE_STRING();                                              \
+  }()
+
+/**
+  \rst
+  Constructs a compile-time format string from a string literal *s*.
+
+  **Example**::
+
+    // A compile-time error because 'd' is an invalid specifier for strings.
+    std::string s = fmt::format(FMT_STRING("{:d}"), "foo");
+  \endrst
+ */
+#define FMT_STRING(s) FMT_STRING_IMPL(s, fmt::detail::compile_string, )
+
+template <size_t width, typename Char, typename OutputIt>
+auto write_codepoint(OutputIt out, char prefix, uint32_t cp) -> OutputIt {
+  *out++ = static_cast<Char>('\\');
+  *out++ = static_cast<Char>(prefix);
+  Char buf[width];
+  fill_n(buf, width, static_cast<Char>('0'));
+  format_uint<4>(buf, cp, width);
+  return copy_str<Char>(buf, buf + width, out);
+}
+
+template <typename OutputIt, typename Char>
+auto write_escaped_cp(OutputIt out, const find_escape_result<Char>& escape)
+    -> OutputIt {
+  auto c = static_cast<Char>(escape.cp);
+  switch (escape.cp) {
+  case '\n':
+    *out++ = static_cast<Char>('\\');
+    c = static_cast<Char>('n');
+    break;
+  case '\r':
+    *out++ = static_cast<Char>('\\');
+    c = static_cast<Char>('r');
+    break;
+  case '\t':
+    *out++ = static_cast<Char>('\\');
+    c = static_cast<Char>('t');
+    break;
+  case '"':
+    FMT_FALLTHROUGH;
+  case '\'':
+    FMT_FALLTHROUGH;
+  case '\\':
+    *out++ = static_cast<Char>('\\');
+    break;
+  default:
+    if (escape.cp < 0x100) {
+      return write_codepoint<2, Char>(out, 'x', escape.cp);
+    }
+    if (escape.cp < 0x10000) {
+      return write_codepoint<4, Char>(out, 'u', escape.cp);
+    }
+    if (escape.cp < 0x110000) {
+      return write_codepoint<8, Char>(out, 'U', escape.cp);
+    }
+    for (Char escape_char : basic_string_view<Char>(
+             escape.begin, to_unsigned(escape.end - escape.begin))) {
+      out = write_codepoint<2, Char>(out, 'x',
+                                     static_cast<uint32_t>(escape_char) & 0xFF);
+    }
+    return out;
+  }
+  *out++ = c;
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+auto write_escaped_string(OutputIt out, basic_string_view<Char> str)
+    -> OutputIt {
+  *out++ = static_cast<Char>('"');
+  auto begin = str.begin(), end = str.end();
+  do {
+    auto escape = find_escape(begin, end);
+    out = copy_str<Char>(begin, escape.begin, out);
+    begin = escape.end;
+    if (!begin) break;
+    out = write_escaped_cp<OutputIt, Char>(out, escape);
+  } while (begin != end);
+  *out++ = static_cast<Char>('"');
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+auto write_escaped_char(OutputIt out, Char v) -> OutputIt {
+  *out++ = static_cast<Char>('\'');
+  if ((needs_escape(static_cast<uint32_t>(v)) && v != static_cast<Char>('"')) ||
+      v == static_cast<Char>('\'')) {
+    out = write_escaped_cp(
+        out, find_escape_result<Char>{&v, &v + 1, static_cast<uint32_t>(v)});
+  } else {
+    *out++ = v;
+  }
+  *out++ = static_cast<Char>('\'');
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write_char(OutputIt out, Char value,
+                              const format_specs<Char>& specs) -> OutputIt {
+  bool is_debug = specs.type == presentation_type::debug;
+  return write_padded(out, specs, 1, [=](reserve_iterator<OutputIt> it) {
+    if (is_debug) return write_escaped_char(it, value);
+    *it++ = value;
+    return it;
+  });
+}
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, Char value,
+                         const format_specs<Char>& specs, locale_ref loc = {})
+    -> OutputIt {
+  // char is formatted as unsigned char for consistency across platforms.
+  using unsigned_type =
+      conditional_t<std::is_same<Char, char>::value, unsigned char, unsigned>;
+  return check_char_specs(specs)
+             ? write_char(out, value, specs)
+             : write(out, static_cast<unsigned_type>(value), specs, loc);
+}
+
+// Data for write_int that doesn't depend on output iterator type. It is used to
+// avoid template code bloat.
+template <typename Char> struct write_int_data {
+  size_t size;
+  size_t padding;
+
+  FMT_CONSTEXPR write_int_data(int num_digits, unsigned prefix,
+                               const format_specs<Char>& specs)
+      : size((prefix >> 24) + to_unsigned(num_digits)), padding(0) {
+    if (specs.align == align::numeric) {
+      auto width = to_unsigned(specs.width);
+      if (width > size) {
+        padding = width - size;
+        size = width;
+      }
+    } else if (specs.precision > num_digits) {
+      size = (prefix >> 24) + to_unsigned(specs.precision);
+      padding = to_unsigned(specs.precision - num_digits);
+    }
+  }
+};
+
+// Writes an integer in the format
+//   <left-padding><prefix><numeric-padding><digits><right-padding>
+// where <digits> are written by write_digits(it).
+// prefix contains chars in three lower bytes and the size in the fourth byte.
+template <typename OutputIt, typename Char, typename W>
+FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, int num_digits,
+                                        unsigned prefix,
+                                        const format_specs<Char>& specs,
+                                        W write_digits) -> OutputIt {
+  // Slightly faster check for specs.width == 0 && specs.precision == -1.
+  if ((specs.width | (specs.precision + 1)) == 0) {
+    auto it = reserve(out, to_unsigned(num_digits) + (prefix >> 24));
+    if (prefix != 0) {
+      for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8)
+        *it++ = static_cast<Char>(p & 0xff);
+    }
+    return base_iterator(out, write_digits(it));
+  }
+  auto data = write_int_data<Char>(num_digits, prefix, specs);
+  return write_padded<align::right>(
+      out, specs, data.size, [=](reserve_iterator<OutputIt> it) {
+        for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8)
+          *it++ = static_cast<Char>(p & 0xff);
+        it = detail::fill_n(it, data.padding, static_cast<Char>('0'));
+        return write_digits(it);
+      });
+}
+
+template <typename Char> class digit_grouping {
+ private:
+  std::string grouping_;
+  std::basic_string<Char> thousands_sep_;
+
+  struct next_state {
+    std::string::const_iterator group;
+    int pos;
+  };
+  next_state initial_state() const { return {grouping_.begin(), 0}; }
+
+  // Returns the next digit group separator position.
+  int next(next_state& state) const {
+    if (thousands_sep_.empty()) return max_value<int>();
+    if (state.group == grouping_.end()) return state.pos += grouping_.back();
+    if (*state.group <= 0 || *state.group == max_value<char>())
+      return max_value<int>();
+    state.pos += *state.group++;
+    return state.pos;
+  }
+
+ public:
+  explicit digit_grouping(locale_ref loc, bool localized = true) {
+    if (!localized) return;
+    auto sep = thousands_sep<Char>(loc);
+    grouping_ = sep.grouping;
+    if (sep.thousands_sep) thousands_sep_.assign(1, sep.thousands_sep);
+  }
+  digit_grouping(std::string grouping, std::basic_string<Char> sep)
+      : grouping_(std::move(grouping)), thousands_sep_(std::move(sep)) {}
+
+  bool has_separator() const { return !thousands_sep_.empty(); }
+
+  int count_separators(int num_digits) const {
+    int count = 0;
+    auto state = initial_state();
+    while (num_digits > next(state)) ++count;
+    return count;
+  }
+
+  // Applies grouping to digits and write the output to out.
+  template <typename Out, typename C>
+  Out apply(Out out, basic_string_view<C> digits) const {
+    auto num_digits = static_cast<int>(digits.size());
+    auto separators = basic_memory_buffer<int>();
+    separators.push_back(0);
+    auto state = initial_state();
+    while (int i = next(state)) {
+      if (i >= num_digits) break;
+      separators.push_back(i);
+    }
+    for (int i = 0, sep_index = static_cast<int>(separators.size() - 1);
+         i < num_digits; ++i) {
+      if (num_digits - i == separators[sep_index]) {
+        out =
+            copy_str<Char>(thousands_sep_.data(),
+                           thousands_sep_.data() + thousands_sep_.size(), out);
+        --sep_index;
+      }
+      *out++ = static_cast<Char>(digits[to_unsigned(i)]);
+    }
+    return out;
+  }
+};
+
+// Writes a decimal integer with digit grouping.
+template <typename OutputIt, typename UInt, typename Char>
+auto write_int(OutputIt out, UInt value, unsigned prefix,
+               const format_specs<Char>& specs,
+               const digit_grouping<Char>& grouping) -> OutputIt {
+  static_assert(std::is_same<uint64_or_128_t<UInt>, UInt>::value, "");
+  int num_digits = count_digits(value);
+  char digits[40];
+  format_decimal(digits, value, num_digits);
+  unsigned size = to_unsigned((prefix != 0 ? 1 : 0) + num_digits +
+                              grouping.count_separators(num_digits));
+  return write_padded<align::right>(
+      out, specs, size, size, [&](reserve_iterator<OutputIt> it) {
+        if (prefix != 0) {
+          char sign = static_cast<char>(prefix);
+          *it++ = static_cast<Char>(sign);
+        }
+        return grouping.apply(it, string_view(digits, to_unsigned(num_digits)));
+      });
+}
+
+// Writes a localized value.
+FMT_API auto write_loc(appender out, loc_value value,
+                       const format_specs<>& specs, locale_ref loc) -> bool;
+template <typename OutputIt, typename Char>
+inline auto write_loc(OutputIt, loc_value, const format_specs<Char>&,
+                      locale_ref) -> bool {
+  return false;
+}
+
+FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) {
+  prefix |= prefix != 0 ? value << 8 : value;
+  prefix += (1u + (value > 0xff ? 1 : 0)) << 24;
+}
+
+template <typename UInt> struct write_int_arg {
+  UInt abs_value;
+  unsigned prefix;
+};
+
+template <typename T>
+FMT_CONSTEXPR auto make_write_int_arg(T value, sign_t sign)
+    -> write_int_arg<uint32_or_64_or_128_t<T>> {
+  auto prefix = 0u;
+  auto abs_value = static_cast<uint32_or_64_or_128_t<T>>(value);
+  if (is_negative(value)) {
+    prefix = 0x01000000 | '-';
+    abs_value = 0 - abs_value;
+  } else {
+    constexpr const unsigned prefixes[4] = {0, 0, 0x1000000u | '+',
+                                            0x1000000u | ' '};
+    prefix = prefixes[sign];
+  }
+  return {abs_value, prefix};
+}
+
+template <typename Char = char> struct loc_writer {
+  buffer_appender<Char> out;
+  const format_specs<Char>& specs;
+  std::basic_string<Char> sep;
+  std::string grouping;
+  std::basic_string<Char> decimal_point;
+
+  template <typename T, FMT_ENABLE_IF(is_integer<T>::value)>
+  auto operator()(T value) -> bool {
+    auto arg = make_write_int_arg(value, specs.sign);
+    write_int(out, static_cast<uint64_or_128_t<T>>(arg.abs_value), arg.prefix,
+              specs, digit_grouping<Char>(grouping, sep));
+    return true;
+  }
+
+  template <typename T, FMT_ENABLE_IF(!is_integer<T>::value)>
+  auto operator()(T) -> bool {
+    return false;
+  }
+};
+
+template <typename Char, typename OutputIt, typename T>
+FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, write_int_arg<T> arg,
+                                        const format_specs<Char>& specs,
+                                        locale_ref) -> OutputIt {
+  static_assert(std::is_same<T, uint32_or_64_or_128_t<T>>::value, "");
+  auto abs_value = arg.abs_value;
+  auto prefix = arg.prefix;
+  switch (specs.type) {
+  case presentation_type::none:
+  case presentation_type::dec: {
+    auto num_digits = count_digits(abs_value);
+    return write_int(
+        out, num_digits, prefix, specs, [=](reserve_iterator<OutputIt> it) {
+          return format_decimal<Char>(it, abs_value, num_digits).end;
+        });
+  }
+  case presentation_type::hex_lower:
+  case presentation_type::hex_upper: {
+    bool upper = specs.type == presentation_type::hex_upper;
+    if (specs.alt)
+      prefix_append(prefix, unsigned(upper ? 'X' : 'x') << 8 | '0');
+    int num_digits = count_digits<4>(abs_value);
+    return write_int(
+        out, num_digits, prefix, specs, [=](reserve_iterator<OutputIt> it) {
+          return format_uint<4, Char>(it, abs_value, num_digits, upper);
+        });
+  }
+  case presentation_type::bin_lower:
+  case presentation_type::bin_upper: {
+    bool upper = specs.type == presentation_type::bin_upper;
+    if (specs.alt)
+      prefix_append(prefix, unsigned(upper ? 'B' : 'b') << 8 | '0');
+    int num_digits = count_digits<1>(abs_value);
+    return write_int(out, num_digits, prefix, specs,
+                     [=](reserve_iterator<OutputIt> it) {
+                       return format_uint<1, Char>(it, abs_value, num_digits);
+                     });
+  }
+  case presentation_type::oct: {
+    int num_digits = count_digits<3>(abs_value);
+    // Octal prefix '0' is counted as a digit, so only add it if precision
+    // is not greater than the number of digits.
+    if (specs.alt && specs.precision <= num_digits && abs_value != 0)
+      prefix_append(prefix, '0');
+    return write_int(out, num_digits, prefix, specs,
+                     [=](reserve_iterator<OutputIt> it) {
+                       return format_uint<3, Char>(it, abs_value, num_digits);
+                     });
+  }
+  case presentation_type::chr:
+    return write_char(out, static_cast<Char>(abs_value), specs);
+  default:
+    throw_format_error("invalid format specifier");
+  }
+  return out;
+}
+template <typename Char, typename OutputIt, typename T>
+FMT_CONSTEXPR FMT_NOINLINE auto write_int_noinline(
+    OutputIt out, write_int_arg<T> arg, const format_specs<Char>& specs,
+    locale_ref loc) -> OutputIt {
+  return write_int(out, arg, specs, loc);
+}
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_integral<T>::value &&
+                        !std::is_same<T, bool>::value &&
+                        std::is_same<OutputIt, buffer_appender<Char>>::value)>
+FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value,
+                                    const format_specs<Char>& specs,
+                                    locale_ref loc) -> OutputIt {
+  if (specs.localized && write_loc(out, value, specs, loc)) return out;
+  return write_int_noinline(out, make_write_int_arg(value, specs.sign), specs,
+                            loc);
+}
+// An inlined version of write used in format string compilation.
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_integral<T>::value &&
+                        !std::is_same<T, bool>::value &&
+                        !std::is_same<OutputIt, buffer_appender<Char>>::value)>
+FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value,
+                                    const format_specs<Char>& specs,
+                                    locale_ref loc) -> OutputIt {
+  if (specs.localized && write_loc(out, value, specs, loc)) return out;
+  return write_int(out, make_write_int_arg(value, specs.sign), specs, loc);
+}
+
+// An output iterator that counts the number of objects written to it and
+// discards them.
+class counting_iterator {
+ private:
+  size_t count_;
+
+ public:
+  using iterator_category = std::output_iterator_tag;
+  using difference_type = std::ptrdiff_t;
+  using pointer = void;
+  using reference = void;
+  FMT_UNCHECKED_ITERATOR(counting_iterator);
+
+  struct value_type {
+    template <typename T> FMT_CONSTEXPR void operator=(const T&) {}
+  };
+
+  FMT_CONSTEXPR counting_iterator() : count_(0) {}
+
+  FMT_CONSTEXPR size_t count() const { return count_; }
+
+  FMT_CONSTEXPR counting_iterator& operator++() {
+    ++count_;
+    return *this;
+  }
+  FMT_CONSTEXPR counting_iterator operator++(int) {
+    auto it = *this;
+    ++*this;
+    return it;
+  }
+
+  FMT_CONSTEXPR friend counting_iterator operator+(counting_iterator it,
+                                                   difference_type n) {
+    it.count_ += static_cast<size_t>(n);
+    return it;
+  }
+
+  FMT_CONSTEXPR value_type operator*() const { return {}; }
+};
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
+                         const format_specs<Char>& specs) -> OutputIt {
+  auto data = s.data();
+  auto size = s.size();
+  if (specs.precision >= 0 && to_unsigned(specs.precision) < size)
+    size = code_point_index(s, to_unsigned(specs.precision));
+  bool is_debug = specs.type == presentation_type::debug;
+  size_t width = 0;
+  if (specs.width != 0) {
+    if (is_debug)
+      width = write_escaped_string(counting_iterator{}, s).count();
+    else
+      width = compute_width(basic_string_view<Char>(data, size));
+  }
+  return write_padded(out, specs, size, width,
+                      [=](reserve_iterator<OutputIt> it) {
+                        if (is_debug) return write_escaped_string(it, s);
+                        return copy_str<Char>(data, data + size, it);
+                      });
+}
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out,
+                         basic_string_view<type_identity_t<Char>> s,
+                         const format_specs<Char>& specs, locale_ref)
+    -> OutputIt {
+  return write(out, s, specs);
+}
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, const Char* s,
+                         const format_specs<Char>& specs, locale_ref)
+    -> OutputIt {
+  return specs.type != presentation_type::pointer
+             ? write(out, basic_string_view<Char>(s), specs, {})
+             : write_ptr<Char>(out, bit_cast<uintptr_t>(s), &specs);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_integral<T>::value &&
+                        !std::is_same<T, bool>::value &&
+                        !std::is_same<T, Char>::value)>
+FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt {
+  auto abs_value = static_cast<uint32_or_64_or_128_t<T>>(value);
+  bool negative = is_negative(value);
+  // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer.
+  if (negative) abs_value = ~abs_value + 1;
+  int num_digits = count_digits(abs_value);
+  auto size = (negative ? 1 : 0) + static_cast<size_t>(num_digits);
+  auto it = reserve(out, size);
+  if (auto ptr = to_pointer<Char>(it, size)) {
+    if (negative) *ptr++ = static_cast<Char>('-');
+    format_decimal<Char>(ptr, abs_value, num_digits);
+    return out;
+  }
+  if (negative) *it++ = static_cast<Char>('-');
+  it = format_decimal<Char>(it, abs_value, num_digits).end;
+  return base_iterator(out, it);
+}
+
+// DEPRECATED!
+template <typename Char>
+FMT_CONSTEXPR auto parse_align(const Char* begin, const Char* end,
+                               format_specs<Char>& specs) -> const Char* {
+  FMT_ASSERT(begin != end, "");
+  auto align = align::none;
+  auto p = begin + code_point_length(begin);
+  if (end - p <= 0) p = begin;
+  for (;;) {
+    switch (to_ascii(*p)) {
+    case '<':
+      align = align::left;
+      break;
+    case '>':
+      align = align::right;
+      break;
+    case '^':
+      align = align::center;
+      break;
+    }
+    if (align != align::none) {
+      if (p != begin) {
+        auto c = *begin;
+        if (c == '}') return begin;
+        if (c == '{') {
+          throw_format_error("invalid fill character '{'");
+          return begin;
+        }
+        specs.fill = {begin, to_unsigned(p - begin)};
+        begin = p + 1;
+      } else {
+        ++begin;
+      }
+      break;
+    } else if (p == begin) {
+      break;
+    }
+    p = begin;
+  }
+  specs.align = align;
+  return begin;
+}
+
+// A floating-point presentation format.
+enum class float_format : unsigned char {
+  general,  // General: exponent notation or fixed point based on magnitude.
+  exp,      // Exponent notation with the default precision of 6, e.g. 1.2e-3.
+  fixed,    // Fixed point with the default precision of 6, e.g. 0.0012.
+  hex
+};
+
+struct float_specs {
+  int precision;
+  float_format format : 8;
+  sign_t sign : 8;
+  bool upper : 1;
+  bool locale : 1;
+  bool binary32 : 1;
+  bool showpoint : 1;
+};
+
+template <typename ErrorHandler = error_handler, typename Char>
+FMT_CONSTEXPR auto parse_float_type_spec(const format_specs<Char>& specs,
+                                         ErrorHandler&& eh = {})
+    -> float_specs {
+  auto result = float_specs();
+  result.showpoint = specs.alt;
+  result.locale = specs.localized;
+  switch (specs.type) {
+  case presentation_type::none:
+    result.format = float_format::general;
+    break;
+  case presentation_type::general_upper:
+    result.upper = true;
+    FMT_FALLTHROUGH;
+  case presentation_type::general_lower:
+    result.format = float_format::general;
+    break;
+  case presentation_type::exp_upper:
+    result.upper = true;
+    FMT_FALLTHROUGH;
+  case presentation_type::exp_lower:
+    result.format = float_format::exp;
+    result.showpoint |= specs.precision != 0;
+    break;
+  case presentation_type::fixed_upper:
+    result.upper = true;
+    FMT_FALLTHROUGH;
+  case presentation_type::fixed_lower:
+    result.format = float_format::fixed;
+    result.showpoint |= specs.precision != 0;
+    break;
+  case presentation_type::hexfloat_upper:
+    result.upper = true;
+    FMT_FALLTHROUGH;
+  case presentation_type::hexfloat_lower:
+    result.format = float_format::hex;
+    break;
+  default:
+    eh.on_error("invalid format specifier");
+    break;
+  }
+  return result;
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR20 auto write_nonfinite(OutputIt out, bool isnan,
+                                     format_specs<Char> specs,
+                                     const float_specs& fspecs) -> OutputIt {
+  auto str =
+      isnan ? (fspecs.upper ? "NAN" : "nan") : (fspecs.upper ? "INF" : "inf");
+  constexpr size_t str_size = 3;
+  auto sign = fspecs.sign;
+  auto size = str_size + (sign ? 1 : 0);
+  // Replace '0'-padding with space for non-finite values.
+  const bool is_zero_fill =
+      specs.fill.size() == 1 && *specs.fill.data() == static_cast<Char>('0');
+  if (is_zero_fill) specs.fill[0] = static_cast<Char>(' ');
+  return write_padded(out, specs, size, [=](reserve_iterator<OutputIt> it) {
+    if (sign) *it++ = detail::sign<Char>(sign);
+    return copy_str<Char>(str, str + str_size, it);
+  });
+}
+
+// A decimal floating-point number significand * pow(10, exp).
+struct big_decimal_fp {
+  const char* significand;
+  int significand_size;
+  int exponent;
+};
+
+constexpr auto get_significand_size(const big_decimal_fp& f) -> int {
+  return f.significand_size;
+}
+template <typename T>
+inline auto get_significand_size(const dragonbox::decimal_fp<T>& f) -> int {
+  return count_digits(f.significand);
+}
+
+template <typename Char, typename OutputIt>
+constexpr auto write_significand(OutputIt out, const char* significand,
+                                 int significand_size) -> OutputIt {
+  return copy_str<Char>(significand, significand + significand_size, out);
+}
+template <typename Char, typename OutputIt, typename UInt>
+inline auto write_significand(OutputIt out, UInt significand,
+                              int significand_size) -> OutputIt {
+  return format_decimal<Char>(out, significand, significand_size).end;
+}
+template <typename Char, typename OutputIt, typename T, typename Grouping>
+FMT_CONSTEXPR20 auto write_significand(OutputIt out, T significand,
+                                       int significand_size, int exponent,
+                                       const Grouping& grouping) -> OutputIt {
+  if (!grouping.has_separator()) {
+    out = write_significand<Char>(out, significand, significand_size);
+    return detail::fill_n(out, exponent, static_cast<Char>('0'));
+  }
+  auto buffer = memory_buffer();
+  write_significand<char>(appender(buffer), significand, significand_size);
+  detail::fill_n(appender(buffer), exponent, '0');
+  return grouping.apply(out, string_view(buffer.data(), buffer.size()));
+}
+
+template <typename Char, typename UInt,
+          FMT_ENABLE_IF(std::is_integral<UInt>::value)>
+inline auto write_significand(Char* out, UInt significand, int significand_size,
+                              int integral_size, Char decimal_point) -> Char* {
+  if (!decimal_point)
+    return format_decimal(out, significand, significand_size).end;
+  out += significand_size + 1;
+  Char* end = out;
+  int floating_size = significand_size - integral_size;
+  for (int i = floating_size / 2; i > 0; --i) {
+    out -= 2;
+    copy2(out, digits2(static_cast<std::size_t>(significand % 100)));
+    significand /= 100;
+  }
+  if (floating_size % 2 != 0) {
+    *--out = static_cast<Char>('0' + significand % 10);
+    significand /= 10;
+  }
+  *--out = decimal_point;
+  format_decimal(out - integral_size, significand, integral_size);
+  return end;
+}
+
+template <typename OutputIt, typename UInt, typename Char,
+          FMT_ENABLE_IF(!std::is_pointer<remove_cvref_t<OutputIt>>::value)>
+inline auto write_significand(OutputIt out, UInt significand,
+                              int significand_size, int integral_size,
+                              Char decimal_point) -> OutputIt {
+  // Buffer is large enough to hold digits (digits10 + 1) and a decimal point.
+  Char buffer[digits10<UInt>() + 2];
+  auto end = write_significand(buffer, significand, significand_size,
+                               integral_size, decimal_point);
+  return detail::copy_str_noinline<Char>(buffer, end, out);
+}
+
+template <typename OutputIt, typename Char>
+FMT_CONSTEXPR auto write_significand(OutputIt out, const char* significand,
+                                     int significand_size, int integral_size,
+                                     Char decimal_point) -> OutputIt {
+  out = detail::copy_str_noinline<Char>(significand,
+                                        significand + integral_size, out);
+  if (!decimal_point) return out;
+  *out++ = decimal_point;
+  return detail::copy_str_noinline<Char>(significand + integral_size,
+                                         significand + significand_size, out);
+}
+
+template <typename OutputIt, typename Char, typename T, typename Grouping>
+FMT_CONSTEXPR20 auto write_significand(OutputIt out, T significand,
+                                       int significand_size, int integral_size,
+                                       Char decimal_point,
+                                       const Grouping& grouping) -> OutputIt {
+  if (!grouping.has_separator()) {
+    return write_significand(out, significand, significand_size, integral_size,
+                             decimal_point);
+  }
+  auto buffer = basic_memory_buffer<Char>();
+  write_significand(buffer_appender<Char>(buffer), significand,
+                    significand_size, integral_size, decimal_point);
+  grouping.apply(
+      out, basic_string_view<Char>(buffer.data(), to_unsigned(integral_size)));
+  return detail::copy_str_noinline<Char>(buffer.data() + integral_size,
+                                         buffer.end(), out);
+}
+
+template <typename OutputIt, typename DecimalFP, typename Char,
+          typename Grouping = digit_grouping<Char>>
+FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f,
+                                    const format_specs<Char>& specs,
+                                    float_specs fspecs, locale_ref loc)
+    -> OutputIt {
+  auto significand = f.significand;
+  int significand_size = get_significand_size(f);
+  const Char zero = static_cast<Char>('0');
+  auto sign = fspecs.sign;
+  size_t size = to_unsigned(significand_size) + (sign ? 1 : 0);
+  using iterator = reserve_iterator<OutputIt>;
+
+  Char decimal_point =
+      fspecs.locale ? detail::decimal_point<Char>(loc) : static_cast<Char>('.');
+
+  int output_exp = f.exponent + significand_size - 1;
+  auto use_exp_format = [=]() {
+    if (fspecs.format == float_format::exp) return true;
+    if (fspecs.format != float_format::general) return false;
+    // Use the fixed notation if the exponent is in [exp_lower, exp_upper),
+    // e.g. 0.0001 instead of 1e-04. Otherwise use the exponent notation.
+    const int exp_lower = -4, exp_upper = 16;
+    return output_exp < exp_lower ||
+           output_exp >= (fspecs.precision > 0 ? fspecs.precision : exp_upper);
+  };
+  if (use_exp_format()) {
+    int num_zeros = 0;
+    if (fspecs.showpoint) {
+      num_zeros = fspecs.precision - significand_size;
+      if (num_zeros < 0) num_zeros = 0;
+      size += to_unsigned(num_zeros);
+    } else if (significand_size == 1) {
+      decimal_point = Char();
+    }
+    auto abs_output_exp = output_exp >= 0 ? output_exp : -output_exp;
+    int exp_digits = 2;
+    if (abs_output_exp >= 100) exp_digits = abs_output_exp >= 1000 ? 4 : 3;
+
+    size += to_unsigned((decimal_point ? 1 : 0) + 2 + exp_digits);
+    char exp_char = fspecs.upper ? 'E' : 'e';
+    auto write = [=](iterator it) {
+      if (sign) *it++ = detail::sign<Char>(sign);
+      // Insert a decimal point after the first digit and add an exponent.
+      it = write_significand(it, significand, significand_size, 1,
+                             decimal_point);
+      if (num_zeros > 0) it = detail::fill_n(it, num_zeros, zero);
+      *it++ = static_cast<Char>(exp_char);
+      return write_exponent<Char>(output_exp, it);
+    };
+    return specs.width > 0 ? write_padded<align::right>(out, specs, size, write)
+                           : base_iterator(out, write(reserve(out, size)));
+  }
+
+  int exp = f.exponent + significand_size;
+  if (f.exponent >= 0) {
+    // 1234e5 -> 123400000[.0+]
+    size += to_unsigned(f.exponent);
+    int num_zeros = fspecs.precision - exp;
+    abort_fuzzing_if(num_zeros > 5000);
+    if (fspecs.showpoint) {
+      ++size;
+      if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 0;
+      if (num_zeros > 0) size += to_unsigned(num_zeros);
+    }
+    auto grouping = Grouping(loc, fspecs.locale);
+    size += to_unsigned(grouping.count_separators(exp));
+    return write_padded<align::right>(out, specs, size, [&](iterator it) {
+      if (sign) *it++ = detail::sign<Char>(sign);
+      it = write_significand<Char>(it, significand, significand_size,
+                                   f.exponent, grouping);
+      if (!fspecs.showpoint) return it;
+      *it++ = decimal_point;
+      return num_zeros > 0 ? detail::fill_n(it, num_zeros, zero) : it;
+    });
+  } else if (exp > 0) {
+    // 1234e-2 -> 12.34[0+]
+    int num_zeros = fspecs.showpoint ? fspecs.precision - significand_size : 0;
+    size += 1 + to_unsigned(num_zeros > 0 ? num_zeros : 0);
+    auto grouping = Grouping(loc, fspecs.locale);
+    size += to_unsigned(grouping.count_separators(exp));
+    return write_padded<align::right>(out, specs, size, [&](iterator it) {
+      if (sign) *it++ = detail::sign<Char>(sign);
+      it = write_significand(it, significand, significand_size, exp,
+                             decimal_point, grouping);
+      return num_zeros > 0 ? detail::fill_n(it, num_zeros, zero) : it;
+    });
+  }
+  // 1234e-6 -> 0.001234
+  int num_zeros = -exp;
+  if (significand_size == 0 && fspecs.precision >= 0 &&
+      fspecs.precision < num_zeros) {
+    num_zeros = fspecs.precision;
+  }
+  bool pointy = num_zeros != 0 || significand_size != 0 || fspecs.showpoint;
+  size += 1 + (pointy ? 1 : 0) + to_unsigned(num_zeros);
+  return write_padded<align::right>(out, specs, size, [&](iterator it) {
+    if (sign) *it++ = detail::sign<Char>(sign);
+    *it++ = zero;
+    if (!pointy) return it;
+    *it++ = decimal_point;
+    it = detail::fill_n(it, num_zeros, zero);
+    return write_significand<Char>(it, significand, significand_size);
+  });
+}
+
+template <typename Char> class fallback_digit_grouping {
+ public:
+  constexpr fallback_digit_grouping(locale_ref, bool) {}
+
+  constexpr bool has_separator() const { return false; }
+
+  constexpr int count_separators(int) const { return 0; }
+
+  template <typename Out, typename C>
+  constexpr Out apply(Out out, basic_string_view<C>) const {
+    return out;
+  }
+};
+
+template <typename OutputIt, typename DecimalFP, typename Char>
+FMT_CONSTEXPR20 auto write_float(OutputIt out, const DecimalFP& f,
+                                 const format_specs<Char>& specs,
+                                 float_specs fspecs, locale_ref loc)
+    -> OutputIt {
+  if (is_constant_evaluated()) {
+    return do_write_float<OutputIt, DecimalFP, Char,
+                          fallback_digit_grouping<Char>>(out, f, specs, fspecs,
+                                                         loc);
+  } else {
+    return do_write_float(out, f, specs, fspecs, loc);
+  }
+}
+
+template <typename T> constexpr bool isnan(T value) {
+  return !(value >= value);  // std::isnan doesn't support __float128.
+}
+
+template <typename T, typename Enable = void>
+struct has_isfinite : std::false_type {};
+
+template <typename T>
+struct has_isfinite<T, enable_if_t<sizeof(std::isfinite(T())) != 0>>
+    : std::true_type {};
+
+template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value&&
+                                        has_isfinite<T>::value)>
+FMT_CONSTEXPR20 bool isfinite(T value) {
+  constexpr T inf = T(std::numeric_limits<double>::infinity());
+  if (is_constant_evaluated())
+    return !detail::isnan(value) && value < inf && value > -inf;
+  return std::isfinite(value);
+}
+template <typename T, FMT_ENABLE_IF(!has_isfinite<T>::value)>
+FMT_CONSTEXPR bool isfinite(T value) {
+  T inf = T(std::numeric_limits<double>::infinity());
+  // std::isfinite doesn't support __float128.
+  return !detail::isnan(value) && value < inf && value > -inf;
+}
+
+template <typename T, FMT_ENABLE_IF(is_floating_point<T>::value)>
+FMT_INLINE FMT_CONSTEXPR bool signbit(T value) {
+  if (is_constant_evaluated()) {
+#ifdef __cpp_if_constexpr
+    if constexpr (std::numeric_limits<double>::is_iec559) {
+      auto bits = detail::bit_cast<uint64_t>(static_cast<double>(value));
+      return (bits >> (num_bits<uint64_t>() - 1)) != 0;
+    }
+#endif
+  }
+  return std::signbit(static_cast<double>(value));
+}
+
+inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) {
+  // Adjust fixed precision by exponent because it is relative to decimal
+  // point.
+  if (exp10 > 0 && precision > max_value<int>() - exp10)
+    FMT_THROW(format_error("number is too big"));
+  precision += exp10;
+}
+
+class bigint {
+ private:
+  // A bigint is stored as an array of bigits (big digits), with bigit at index
+  // 0 being the least significant one.
+  using bigit = uint32_t;
+  using double_bigit = uint64_t;
+  enum { bigits_capacity = 32 };
+  basic_memory_buffer<bigit, bigits_capacity> bigits_;
+  int exp_;
+
+  FMT_CONSTEXPR20 bigit operator[](int index) const {
+    return bigits_[to_unsigned(index)];
+  }
+  FMT_CONSTEXPR20 bigit& operator[](int index) {
+    return bigits_[to_unsigned(index)];
+  }
+
+  static constexpr const int bigit_bits = num_bits<bigit>();
+
+  friend struct formatter<bigint>;
+
+  FMT_CONSTEXPR20 void subtract_bigits(int index, bigit other, bigit& borrow) {
+    auto result = static_cast<double_bigit>((*this)[index]) - other - borrow;
+    (*this)[index] = static_cast<bigit>(result);
+    borrow = static_cast<bigit>(result >> (bigit_bits * 2 - 1));
+  }
+
+  FMT_CONSTEXPR20 void remove_leading_zeros() {
+    int num_bigits = static_cast<int>(bigits_.size()) - 1;
+    while (num_bigits > 0 && (*this)[num_bigits] == 0) --num_bigits;
+    bigits_.resize(to_unsigned(num_bigits + 1));
+  }
+
+  // Computes *this -= other assuming aligned bigints and *this >= other.
+  FMT_CONSTEXPR20 void subtract_aligned(const bigint& other) {
+    FMT_ASSERT(other.exp_ >= exp_, "unaligned bigints");
+    FMT_ASSERT(compare(*this, other) >= 0, "");
+    bigit borrow = 0;
+    int i = other.exp_ - exp_;
+    for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j)
+      subtract_bigits(i, other.bigits_[j], borrow);
+    while (borrow > 0) subtract_bigits(i, 0, borrow);
+    remove_leading_zeros();
+  }
+
+  FMT_CONSTEXPR20 void multiply(uint32_t value) {
+    const double_bigit wide_value = value;
+    bigit carry = 0;
+    for (size_t i = 0, n = bigits_.size(); i < n; ++i) {
+      double_bigit result = bigits_[i] * wide_value + carry;
+      bigits_[i] = static_cast<bigit>(result);
+      carry = static_cast<bigit>(result >> bigit_bits);
+    }
+    if (carry != 0) bigits_.push_back(carry);
+  }
+
+  template <typename UInt, FMT_ENABLE_IF(std::is_same<UInt, uint64_t>::value ||
+                                         std::is_same<UInt, uint128_t>::value)>
+  FMT_CONSTEXPR20 void multiply(UInt value) {
+    using half_uint =
+        conditional_t<std::is_same<UInt, uint128_t>::value, uint64_t, uint32_t>;
+    const int shift = num_bits<half_uint>() - bigit_bits;
+    const UInt lower = static_cast<half_uint>(value);
+    const UInt upper = value >> num_bits<half_uint>();
+    UInt carry = 0;
+    for (size_t i = 0, n = bigits_.size(); i < n; ++i) {
+      UInt result = lower * bigits_[i] + static_cast<bigit>(carry);
+      carry = (upper * bigits_[i] << shift) + (result >> bigit_bits) +
+              (carry >> bigit_bits);
+      bigits_[i] = static_cast<bigit>(result);
+    }
+    while (carry != 0) {
+      bigits_.push_back(static_cast<bigit>(carry));
+      carry >>= bigit_bits;
+    }
+  }
+
+  template <typename UInt, FMT_ENABLE_IF(std::is_same<UInt, uint64_t>::value ||
+                                         std::is_same<UInt, uint128_t>::value)>
+  FMT_CONSTEXPR20 void assign(UInt n) {
+    size_t num_bigits = 0;
+    do {
+      bigits_[num_bigits++] = static_cast<bigit>(n);
+      n >>= bigit_bits;
+    } while (n != 0);
+    bigits_.resize(num_bigits);
+    exp_ = 0;
+  }
+
+ public:
+  FMT_CONSTEXPR20 bigint() : exp_(0) {}
+  explicit bigint(uint64_t n) { assign(n); }
+
+  bigint(const bigint&) = delete;
+  void operator=(const bigint&) = delete;
+
+  FMT_CONSTEXPR20 void assign(const bigint& other) {
+    auto size = other.bigits_.size();
+    bigits_.resize(size);
+    auto data = other.bigits_.data();
+    copy_str<bigit>(data, data + size, bigits_.data());
+    exp_ = other.exp_;
+  }
+
+  template <typename Int> FMT_CONSTEXPR20 void operator=(Int n) {
+    FMT_ASSERT(n > 0, "");
+    assign(uint64_or_128_t<Int>(n));
+  }
+
+  FMT_CONSTEXPR20 int num_bigits() const {
+    return static_cast<int>(bigits_.size()) + exp_;
+  }
+
+  FMT_NOINLINE FMT_CONSTEXPR20 bigint& operator<<=(int shift) {
+    FMT_ASSERT(shift >= 0, "");
+    exp_ += shift / bigit_bits;
+    shift %= bigit_bits;
+    if (shift == 0) return *this;
+    bigit carry = 0;
+    for (size_t i = 0, n = bigits_.size(); i < n; ++i) {
+      bigit c = bigits_[i] >> (bigit_bits - shift);
+      bigits_[i] = (bigits_[i] << shift) + carry;
+      carry = c;
+    }
+    if (carry != 0) bigits_.push_back(carry);
+    return *this;
+  }
+
+  template <typename Int> FMT_CONSTEXPR20 bigint& operator*=(Int value) {
+    FMT_ASSERT(value > 0, "");
+    multiply(uint32_or_64_or_128_t<Int>(value));
+    return *this;
+  }
+
+  friend FMT_CONSTEXPR20 int compare(const bigint& lhs, const bigint& rhs) {
+    int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits();
+    if (num_lhs_bigits != num_rhs_bigits)
+      return num_lhs_bigits > num_rhs_bigits ? 1 : -1;
+    int i = static_cast<int>(lhs.bigits_.size()) - 1;
+    int j = static_cast<int>(rhs.bigits_.size()) - 1;
+    int end = i - j;
+    if (end < 0) end = 0;
+    for (; i >= end; --i, --j) {
+      bigit lhs_bigit = lhs[i], rhs_bigit = rhs[j];
+      if (lhs_bigit != rhs_bigit) return lhs_bigit > rhs_bigit ? 1 : -1;
+    }
+    if (i != j) return i > j ? 1 : -1;
+    return 0;
+  }
+
+  // Returns compare(lhs1 + lhs2, rhs).
+  friend FMT_CONSTEXPR20 int add_compare(const bigint& lhs1, const bigint& lhs2,
+                                         const bigint& rhs) {
+    auto minimum = [](int a, int b) { return a < b ? a : b; };
+    auto maximum = [](int a, int b) { return a > b ? a : b; };
+    int max_lhs_bigits = maximum(lhs1.num_bigits(), lhs2.num_bigits());
+    int num_rhs_bigits = rhs.num_bigits();
+    if (max_lhs_bigits + 1 < num_rhs_bigits) return -1;
+    if (max_lhs_bigits > num_rhs_bigits) return 1;
+    auto get_bigit = [](const bigint& n, int i) -> bigit {
+      return i >= n.exp_ && i < n.num_bigits() ? n[i - n.exp_] : 0;
+    };
+    double_bigit borrow = 0;
+    int min_exp = minimum(minimum(lhs1.exp_, lhs2.exp_), rhs.exp_);
+    for (int i = num_rhs_bigits - 1; i >= min_exp; --i) {
+      double_bigit sum =
+          static_cast<double_bigit>(get_bigit(lhs1, i)) + get_bigit(lhs2, i);
+      bigit rhs_bigit = get_bigit(rhs, i);
+      if (sum > rhs_bigit + borrow) return 1;
+      borrow = rhs_bigit + borrow - sum;
+      if (borrow > 1) return -1;
+      borrow <<= bigit_bits;
+    }
+    return borrow != 0 ? -1 : 0;
+  }
+
+  // Assigns pow(10, exp) to this bigint.
+  FMT_CONSTEXPR20 void assign_pow10(int exp) {
+    FMT_ASSERT(exp >= 0, "");
+    if (exp == 0) return *this = 1;
+    // Find the top bit.
+    int bitmask = 1;
+    while (exp >= bitmask) bitmask <<= 1;
+    bitmask >>= 1;
+    // pow(10, exp) = pow(5, exp) * pow(2, exp). First compute pow(5, exp) by
+    // repeated squaring and multiplication.
+    *this = 5;
+    bitmask >>= 1;
+    while (bitmask != 0) {
+      square();
+      if ((exp & bitmask) != 0) *this *= 5;
+      bitmask >>= 1;
+    }
+    *this <<= exp;  // Multiply by pow(2, exp) by shifting.
+  }
+
+  FMT_CONSTEXPR20 void square() {
+    int num_bigits = static_cast<int>(bigits_.size());
+    int num_result_bigits = 2 * num_bigits;
+    basic_memory_buffer<bigit, bigits_capacity> n(std::move(bigits_));
+    bigits_.resize(to_unsigned(num_result_bigits));
+    auto sum = uint128_t();
+    for (int bigit_index = 0; bigit_index < num_bigits; ++bigit_index) {
+      // Compute bigit at position bigit_index of the result by adding
+      // cross-product terms n[i] * n[j] such that i + j == bigit_index.
+      for (int i = 0, j = bigit_index; j >= 0; ++i, --j) {
+        // Most terms are multiplied twice which can be optimized in the future.
+        sum += static_cast<double_bigit>(n[i]) * n[j];
+      }
+      (*this)[bigit_index] = static_cast<bigit>(sum);
+      sum >>= num_bits<bigit>();  // Compute the carry.
+    }
+    // Do the same for the top half.
+    for (int bigit_index = num_bigits; bigit_index < num_result_bigits;
+         ++bigit_index) {
+      for (int j = num_bigits - 1, i = bigit_index - j; i < num_bigits;)
+        sum += static_cast<double_bigit>(n[i++]) * n[j--];
+      (*this)[bigit_index] = static_cast<bigit>(sum);
+      sum >>= num_bits<bigit>();
+    }
+    remove_leading_zeros();
+    exp_ *= 2;
+  }
+
+  // If this bigint has a bigger exponent than other, adds trailing zero to make
+  // exponents equal. This simplifies some operations such as subtraction.
+  FMT_CONSTEXPR20 void align(const bigint& other) {
+    int exp_difference = exp_ - other.exp_;
+    if (exp_difference <= 0) return;
+    int num_bigits = static_cast<int>(bigits_.size());
+    bigits_.resize(to_unsigned(num_bigits + exp_difference));
+    for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j)
+      bigits_[j] = bigits_[i];
+    std::uninitialized_fill_n(bigits_.data(), exp_difference, 0);
+    exp_ -= exp_difference;
+  }
+
+  // Divides this bignum by divisor, assigning the remainder to this and
+  // returning the quotient.
+  FMT_CONSTEXPR20 int divmod_assign(const bigint& divisor) {
+    FMT_ASSERT(this != &divisor, "");
+    if (compare(*this, divisor) < 0) return 0;
+    FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, "");
+    align(divisor);
+    int quotient = 0;
+    do {
+      subtract_aligned(divisor);
+      ++quotient;
+    } while (compare(*this, divisor) >= 0);
+    return quotient;
+  }
+};
+
+// format_dragon flags.
+enum dragon {
+  predecessor_closer = 1,
+  fixup = 2,  // Run fixup to correct exp10 which can be off by one.
+  fixed = 4,
+};
+
+// Formats a floating-point number using a variation of the Fixed-Precision
+// Positive Floating-Point Printout ((FPP)^2) algorithm by Steele & White:
+// https://fmt.dev/papers/p372-steele.pdf.
+FMT_CONSTEXPR20 inline void format_dragon(basic_fp<uint128_t> value,
+                                          unsigned flags, int num_digits,
+                                          buffer<char>& buf, int& exp10) {
+  bigint numerator;    // 2 * R in (FPP)^2.
+  bigint denominator;  // 2 * S in (FPP)^2.
+  // lower and upper are differences between value and corresponding boundaries.
+  bigint lower;             // (M^- in (FPP)^2).
+  bigint upper_store;       // upper's value if different from lower.
+  bigint* upper = nullptr;  // (M^+ in (FPP)^2).
+  // Shift numerator and denominator by an extra bit or two (if lower boundary
+  // is closer) to make lower and upper integers. This eliminates multiplication
+  // by 2 during later computations.
+  bool is_predecessor_closer = (flags & dragon::predecessor_closer) != 0;
+  int shift = is_predecessor_closer ? 2 : 1;
+  if (value.e >= 0) {
+    numerator = value.f;
+    numerator <<= value.e + shift;
+    lower = 1;
+    lower <<= value.e;
+    if (is_predecessor_closer) {
+      upper_store = 1;
+      upper_store <<= value.e + 1;
+      upper = &upper_store;
+    }
+    denominator.assign_pow10(exp10);
+    denominator <<= shift;
+  } else if (exp10 < 0) {
+    numerator.assign_pow10(-exp10);
+    lower.assign(numerator);
+    if (is_predecessor_closer) {
+      upper_store.assign(numerator);
+      upper_store <<= 1;
+      upper = &upper_store;
+    }
+    numerator *= value.f;
+    numerator <<= shift;
+    denominator = 1;
+    denominator <<= shift - value.e;
+  } else {
+    numerator = value.f;
+    numerator <<= shift;
+    denominator.assign_pow10(exp10);
+    denominator <<= shift - value.e;
+    lower = 1;
+    if (is_predecessor_closer) {
+      upper_store = 1ULL << 1;
+      upper = &upper_store;
+    }
+  }
+  int even = static_cast<int>((value.f & 1) == 0);
+  if (!upper) upper = &lower;
+  bool shortest = num_digits < 0;
+  if ((flags & dragon::fixup) != 0) {
+    if (add_compare(numerator, *upper, denominator) + even <= 0) {
+      --exp10;
+      numerator *= 10;
+      if (num_digits < 0) {
+        lower *= 10;
+        if (upper != &lower) *upper *= 10;
+      }
+    }
+    if ((flags & dragon::fixed) != 0) adjust_precision(num_digits, exp10 + 1);
+  }
+  // Invariant: value == (numerator / denominator) * pow(10, exp10).
+  if (shortest) {
+    // Generate the shortest representation.
+    num_digits = 0;
+    char* data = buf.data();
+    for (;;) {
+      int digit = numerator.divmod_assign(denominator);
+      bool low = compare(numerator, lower) - even < 0;  // numerator <[=] lower.
+      // numerator + upper >[=] pow10:
+      bool high = add_compare(numerator, *upper, denominator) + even > 0;
+      data[num_digits++] = static_cast<char>('0' + digit);
+      if (low || high) {
+        if (!low) {
+          ++data[num_digits - 1];
+        } else if (high) {
+          int result = add_compare(numerator, numerator, denominator);
+          // Round half to even.
+          if (result > 0 || (result == 0 && (digit % 2) != 0))
+            ++data[num_digits - 1];
+        }
+        buf.try_resize(to_unsigned(num_digits));
+        exp10 -= num_digits - 1;
+        return;
+      }
+      numerator *= 10;
+      lower *= 10;
+      if (upper != &lower) *upper *= 10;
+    }
+  }
+  // Generate the given number of digits.
+  exp10 -= num_digits - 1;
+  if (num_digits <= 0) {
+    denominator *= 10;
+    auto digit = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0';
+    buf.push_back(digit);
+    return;
+  }
+  buf.try_resize(to_unsigned(num_digits));
+  for (int i = 0; i < num_digits - 1; ++i) {
+    int digit = numerator.divmod_assign(denominator);
+    buf[i] = static_cast<char>('0' + digit);
+    numerator *= 10;
+  }
+  int digit = numerator.divmod_assign(denominator);
+  auto result = add_compare(numerator, numerator, denominator);
+  if (result > 0 || (result == 0 && (digit % 2) != 0)) {
+    if (digit == 9) {
+      const auto overflow = '0' + 10;
+      buf[num_digits - 1] = overflow;
+      // Propagate the carry.
+      for (int i = num_digits - 1; i > 0 && buf[i] == overflow; --i) {
+        buf[i] = '0';
+        ++buf[i - 1];
+      }
+      if (buf[0] == overflow) {
+        buf[0] = '1';
+        if ((flags & dragon::fixed) != 0) buf.push_back('0');
+        else ++exp10;
+      }
+      return;
+    }
+    ++digit;
+  }
+  buf[num_digits - 1] = static_cast<char>('0' + digit);
+}
+
+// Formats a floating-point number using the hexfloat format.
+template <typename Float, FMT_ENABLE_IF(!is_double_double<Float>::value)>
+FMT_CONSTEXPR20 void format_hexfloat(Float value, int precision,
+                                     float_specs specs, buffer<char>& buf) {
+  // float is passed as double to reduce the number of instantiations and to
+  // simplify implementation.
+  static_assert(!std::is_same<Float, float>::value, "");
+
+  using info = dragonbox::float_info<Float>;
+
+  // Assume Float is in the format [sign][exponent][significand].
+  using carrier_uint = typename info::carrier_uint;
+
+  constexpr auto num_float_significand_bits =
+      detail::num_significand_bits<Float>();
+
+  basic_fp<carrier_uint> f(value);
+  f.e += num_float_significand_bits;
+  if (!has_implicit_bit<Float>()) --f.e;
+
+  constexpr auto num_fraction_bits =
+      num_float_significand_bits + (has_implicit_bit<Float>() ? 1 : 0);
+  constexpr auto num_xdigits = (num_fraction_bits + 3) / 4;
+
+  constexpr auto leading_shift = ((num_xdigits - 1) * 4);
+  const auto leading_mask = carrier_uint(0xF) << leading_shift;
+  const auto leading_xdigit =
+      static_cast<uint32_t>((f.f & leading_mask) >> leading_shift);
+  if (leading_xdigit > 1) f.e -= (32 - countl_zero(leading_xdigit) - 1);
+
+  int print_xdigits = num_xdigits - 1;
+  if (precision >= 0 && print_xdigits > precision) {
+    const int shift = ((print_xdigits - precision - 1) * 4);
+    const auto mask = carrier_uint(0xF) << shift;
+    const auto v = static_cast<uint32_t>((f.f & mask) >> shift);
+
+    if (v >= 8) {
+      const auto inc = carrier_uint(1) << (shift + 4);
+      f.f += inc;
+      f.f &= ~(inc - 1);
+    }
+
+    // Check long double overflow
+    if (!has_implicit_bit<Float>()) {
+      const auto implicit_bit = carrier_uint(1) << num_float_significand_bits;
+      if ((f.f & implicit_bit) == implicit_bit) {
+        f.f >>= 4;
+        f.e += 4;
+      }
+    }
+
+    print_xdigits = precision;
+  }
+
+  char xdigits[num_bits<carrier_uint>() / 4];
+  detail::fill_n(xdigits, sizeof(xdigits), '0');
+  format_uint<4>(xdigits, f.f, num_xdigits, specs.upper);
+
+  // Remove zero tail
+  while (print_xdigits > 0 && xdigits[print_xdigits] == '0') --print_xdigits;
+
+  buf.push_back('0');
+  buf.push_back(specs.upper ? 'X' : 'x');
+  buf.push_back(xdigits[0]);
+  if (specs.showpoint || print_xdigits > 0 || print_xdigits < precision)
+    buf.push_back('.');
+  buf.append(xdigits + 1, xdigits + 1 + print_xdigits);
+  for (; print_xdigits < precision; ++print_xdigits) buf.push_back('0');
+
+  buf.push_back(specs.upper ? 'P' : 'p');
+
+  uint32_t abs_e;
+  if (f.e < 0) {
+    buf.push_back('-');
+    abs_e = static_cast<uint32_t>(-f.e);
+  } else {
+    buf.push_back('+');
+    abs_e = static_cast<uint32_t>(f.e);
+  }
+  format_decimal<char>(appender(buf), abs_e, detail::count_digits(abs_e));
+}
+
+template <typename Float, FMT_ENABLE_IF(is_double_double<Float>::value)>
+FMT_CONSTEXPR20 void format_hexfloat(Float value, int precision,
+                                     float_specs specs, buffer<char>& buf) {
+  format_hexfloat(static_cast<double>(value), precision, specs, buf);
+}
+
+template <typename Float>
+FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
+                                  buffer<char>& buf) -> int {
+  // float is passed as double to reduce the number of instantiations.
+  static_assert(!std::is_same<Float, float>::value, "");
+  FMT_ASSERT(value >= 0, "value is negative");
+  auto converted_value = convert_float(value);
+
+  const bool fixed = specs.format == float_format::fixed;
+  if (value <= 0) {  // <= instead of == to silence a warning.
+    if (precision <= 0 || !fixed) {
+      buf.push_back('0');
+      return 0;
+    }
+    buf.try_resize(to_unsigned(precision));
+    fill_n(buf.data(), precision, '0');
+    return -precision;
+  }
+
+  int exp = 0;
+  bool use_dragon = true;
+  unsigned dragon_flags = 0;
+  if (!is_fast_float<Float>() || is_constant_evaluated()) {
+    const auto inv_log2_10 = 0.3010299956639812;  // 1 / log2(10)
+    using info = dragonbox::float_info<decltype(converted_value)>;
+    const auto f = basic_fp<typename info::carrier_uint>(converted_value);
+    // Compute exp, an approximate power of 10, such that
+    //   10^(exp - 1) <= value < 10^exp or 10^exp <= value < 10^(exp + 1).
+    // This is based on log10(value) == log2(value) / log2(10) and approximation
+    // of log2(value) by e + num_fraction_bits idea from double-conversion.
+    auto e = (f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10;
+    exp = static_cast<int>(e);
+    if (e > exp) ++exp;  // Compute ceil.
+    dragon_flags = dragon::fixup;
+  } else if (precision < 0) {
+    // Use Dragonbox for the shortest format.
+    if (specs.binary32) {
+      auto dec = dragonbox::to_decimal(static_cast<float>(value));
+      write<char>(buffer_appender<char>(buf), dec.significand);
+      return dec.exponent;
+    }
+    auto dec = dragonbox::to_decimal(static_cast<double>(value));
+    write<char>(buffer_appender<char>(buf), dec.significand);
+    return dec.exponent;
+  } else {
+    // Extract significand bits and exponent bits.
+    using info = dragonbox::float_info<double>;
+    auto br = bit_cast<uint64_t>(static_cast<double>(value));
+
+    const uint64_t significand_mask =
+        (static_cast<uint64_t>(1) << num_significand_bits<double>()) - 1;
+    uint64_t significand = (br & significand_mask);
+    int exponent = static_cast<int>((br & exponent_mask<double>()) >>
+                                    num_significand_bits<double>());
+
+    if (exponent != 0) {  // Check if normal.
+      exponent -= exponent_bias<double>() + num_significand_bits<double>();
+      significand |=
+          (static_cast<uint64_t>(1) << num_significand_bits<double>());
+      significand <<= 1;
+    } else {
+      // Normalize subnormal inputs.
+      FMT_ASSERT(significand != 0, "zeros should not appear here");
+      int shift = countl_zero(significand);
+      FMT_ASSERT(shift >= num_bits<uint64_t>() - num_significand_bits<double>(),
+                 "");
+      shift -= (num_bits<uint64_t>() - num_significand_bits<double>() - 2);
+      exponent = (std::numeric_limits<double>::min_exponent -
+                  num_significand_bits<double>()) -
+                 shift;
+      significand <<= shift;
+    }
+
+    // Compute the first several nonzero decimal significand digits.
+    // We call the number we get the first segment.
+    const int k = info::kappa - dragonbox::floor_log10_pow2(exponent);
+    exp = -k;
+    const int beta = exponent + dragonbox::floor_log2_pow10(k);
+    uint64_t first_segment;
+    bool has_more_segments;
+    int digits_in_the_first_segment;
+    {
+      const auto r = dragonbox::umul192_upper128(
+          significand << beta, dragonbox::get_cached_power(k));
+      first_segment = r.high();
+      has_more_segments = r.low() != 0;
+
+      // The first segment can have 18 ~ 19 digits.
+      if (first_segment >= 1000000000000000000ULL) {
+        digits_in_the_first_segment = 19;
+      } else {
+        // When it is of 18-digits, we align it to 19-digits by adding a bogus
+        // zero at the end.
+        digits_in_the_first_segment = 18;
+        first_segment *= 10;
+      }
+    }
+
+    // Compute the actual number of decimal digits to print.
+    if (fixed) adjust_precision(precision, exp + digits_in_the_first_segment);
+
+    // Use Dragon4 only when there might be not enough digits in the first
+    // segment.
+    if (digits_in_the_first_segment > precision) {
+      use_dragon = false;
+
+      if (precision <= 0) {
+        exp += digits_in_the_first_segment;
+
+        if (precision < 0) {
+          // Nothing to do, since all we have are just leading zeros.
+          buf.try_resize(0);
+        } else {
+          // We may need to round-up.
+          buf.try_resize(1);
+          if ((first_segment | static_cast<uint64_t>(has_more_segments)) >
+              5000000000000000000ULL) {
+            buf[0] = '1';
+          } else {
+            buf[0] = '0';
+          }
+        }
+      }  // precision <= 0
+      else {
+        exp += digits_in_the_first_segment - precision;
+
+        // When precision > 0, we divide the first segment into three
+        // subsegments, each with 9, 9, and 0 ~ 1 digits so that each fits
+        // in 32-bits which usually allows faster calculation than in
+        // 64-bits. Since some compiler (e.g. MSVC) doesn't know how to optimize
+        // division-by-constant for large 64-bit divisors, we do it here
+        // manually. The magic number 7922816251426433760 below is equal to
+        // ceil(2^(64+32) / 10^10).
+        const uint32_t first_subsegment = static_cast<uint32_t>(
+            dragonbox::umul128_upper64(first_segment, 7922816251426433760ULL) >>
+            32);
+        const uint64_t second_third_subsegments =
+            first_segment - first_subsegment * 10000000000ULL;
+
+        uint64_t prod;
+        uint32_t digits;
+        bool should_round_up;
+        int number_of_digits_to_print = precision > 9 ? 9 : precision;
+
+        // Print a 9-digits subsegment, either the first or the second.
+        auto print_subsegment = [&](uint32_t subsegment, char* buffer) {
+          int number_of_digits_printed = 0;
+
+          // If we want to print an odd number of digits from the subsegment,
+          if ((number_of_digits_to_print & 1) != 0) {
+            // Convert to 64-bit fixed-point fractional form with 1-digit
+            // integer part. The magic number 720575941 is a good enough
+            // approximation of 2^(32 + 24) / 10^8; see
+            // https://jk-jeon.github.io/posts/2022/12/fixed-precision-formatting/#fixed-length-case
+            // for details.
+            prod = ((subsegment * static_cast<uint64_t>(720575941)) >> 24) + 1;
+            digits = static_cast<uint32_t>(prod >> 32);
+            *buffer = static_cast<char>('0' + digits);
+            number_of_digits_printed++;
+          }
+          // If we want to print an even number of digits from the
+          // first_subsegment,
+          else {
+            // Convert to 64-bit fixed-point fractional form with 2-digits
+            // integer part. The magic number 450359963 is a good enough
+            // approximation of 2^(32 + 20) / 10^7; see
+            // https://jk-jeon.github.io/posts/2022/12/fixed-precision-formatting/#fixed-length-case
+            // for details.
+            prod = ((subsegment * static_cast<uint64_t>(450359963)) >> 20) + 1;
+            digits = static_cast<uint32_t>(prod >> 32);
+            copy2(buffer, digits2(digits));
+            number_of_digits_printed += 2;
+          }
+
+          // Print all digit pairs.
+          while (number_of_digits_printed < number_of_digits_to_print) {
+            prod = static_cast<uint32_t>(prod) * static_cast<uint64_t>(100);
+            digits = static_cast<uint32_t>(prod >> 32);
+            copy2(buffer + number_of_digits_printed, digits2(digits));
+            number_of_digits_printed += 2;
+          }
+        };
+
+        // Print first subsegment.
+        print_subsegment(first_subsegment, buf.data());
+
+        // Perform rounding if the first subsegment is the last subsegment to
+        // print.
+        if (precision <= 9) {
+          // Rounding inside the subsegment.
+          // We round-up if:
+          //  - either the fractional part is strictly larger than 1/2, or
+          //  - the fractional part is exactly 1/2 and the last digit is odd.
+          // We rely on the following observations:
+          //  - If fractional_part >= threshold, then the fractional part is
+          //    strictly larger than 1/2.
+          //  - If the MSB of fractional_part is set, then the fractional part
+          //    must be at least 1/2.
+          //  - When the MSB of fractional_part is set, either
+          //    second_third_subsegments being nonzero or has_more_segments
+          //    being true means there are further digits not printed, so the
+          //    fractional part is strictly larger than 1/2.
+          if (precision < 9) {
+            uint32_t fractional_part = static_cast<uint32_t>(prod);
+            should_round_up = fractional_part >=
+                                  data::fractional_part_rounding_thresholds
+                                      [8 - number_of_digits_to_print] ||
+                              ((fractional_part >> 31) &
+                               ((digits & 1) | (second_third_subsegments != 0) |
+                                has_more_segments)) != 0;
+          }
+          // Rounding at the subsegment boundary.
+          // In this case, the fractional part is at least 1/2 if and only if
+          // second_third_subsegments >= 5000000000ULL, and is strictly larger
+          // than 1/2 if we further have either second_third_subsegments >
+          // 5000000000ULL or has_more_segments == true.
+          else {
+            should_round_up = second_third_subsegments > 5000000000ULL ||
+                              (second_third_subsegments == 5000000000ULL &&
+                               ((digits & 1) != 0 || has_more_segments));
+          }
+        }
+        // Otherwise, print the second subsegment.
+        else {
+          // Compilers are not aware of how to leverage the maximum value of
+          // second_third_subsegments to find out a better magic number which
+          // allows us to eliminate an additional shift. 1844674407370955162 =
+          // ceil(2^64/10) < ceil(2^64*(10^9/(10^10 - 1))).
+          const uint32_t second_subsegment =
+              static_cast<uint32_t>(dragonbox::umul128_upper64(
+                  second_third_subsegments, 1844674407370955162ULL));
+          const uint32_t third_subsegment =
+              static_cast<uint32_t>(second_third_subsegments) -
+              second_subsegment * 10;
+
+          number_of_digits_to_print = precision - 9;
+          print_subsegment(second_subsegment, buf.data() + 9);
+
+          // Rounding inside the subsegment.
+          if (precision < 18) {
+            // The condition third_subsegment != 0 implies that the segment was
+            // of 19 digits, so in this case the third segment should be
+            // consisting of a genuine digit from the input.
+            uint32_t fractional_part = static_cast<uint32_t>(prod);
+            should_round_up = fractional_part >=
+                                  data::fractional_part_rounding_thresholds
+                                      [8 - number_of_digits_to_print] ||
+                              ((fractional_part >> 31) &
+                               ((digits & 1) | (third_subsegment != 0) |
+                                has_more_segments)) != 0;
+          }
+          // Rounding at the subsegment boundary.
+          else {
+            // In this case, the segment must be of 19 digits, thus
+            // the third subsegment should be consisting of a genuine digit from
+            // the input.
+            should_round_up = third_subsegment > 5 ||
+                              (third_subsegment == 5 &&
+                               ((digits & 1) != 0 || has_more_segments));
+          }
+        }
+
+        // Round-up if necessary.
+        if (should_round_up) {
+          ++buf[precision - 1];
+          for (int i = precision - 1; i > 0 && buf[i] > '9'; --i) {
+            buf[i] = '0';
+            ++buf[i - 1];
+          }
+          if (buf[0] > '9') {
+            buf[0] = '1';
+            if (fixed)
+              buf[precision++] = '0';
+            else
+              ++exp;
+          }
+        }
+        buf.try_resize(to_unsigned(precision));
+      }
+    }  // if (digits_in_the_first_segment > precision)
+    else {
+      // Adjust the exponent for its use in Dragon4.
+      exp += digits_in_the_first_segment - 1;
+    }
+  }
+  if (use_dragon) {
+    auto f = basic_fp<uint128_t>();
+    bool is_predecessor_closer = specs.binary32
+                                     ? f.assign(static_cast<float>(value))
+                                     : f.assign(converted_value);
+    if (is_predecessor_closer) dragon_flags |= dragon::predecessor_closer;
+    if (fixed) dragon_flags |= dragon::fixed;
+    // Limit precision to the maximum possible number of significant digits in
+    // an IEEE754 double because we don't need to generate zeros.
+    const int max_double_digits = 767;
+    if (precision > max_double_digits) precision = max_double_digits;
+    format_dragon(f, dragon_flags, precision, buf, exp);
+  }
+  if (!fixed && !specs.showpoint) {
+    // Remove trailing zeros.
+    auto num_digits = buf.size();
+    while (num_digits > 0 && buf[num_digits - 1] == '0') {
+      --num_digits;
+      ++exp;
+    }
+    buf.try_resize(num_digits);
+  }
+  return exp;
+}
+template <typename Char, typename OutputIt, typename T>
+FMT_CONSTEXPR20 auto write_float(OutputIt out, T value,
+                                 format_specs<Char> specs, locale_ref loc)
+    -> OutputIt {
+  float_specs fspecs = parse_float_type_spec(specs);
+  fspecs.sign = specs.sign;
+  if (detail::signbit(value)) {  // value < 0 is false for NaN so use signbit.
+    fspecs.sign = sign::minus;
+    value = -value;
+  } else if (fspecs.sign == sign::minus) {
+    fspecs.sign = sign::none;
+  }
+
+  if (!detail::isfinite(value))
+    return write_nonfinite(out, detail::isnan(value), specs, fspecs);
+
+  if (specs.align == align::numeric && fspecs.sign) {
+    auto it = reserve(out, 1);
+    *it++ = detail::sign<Char>(fspecs.sign);
+    out = base_iterator(out, it);
+    fspecs.sign = sign::none;
+    if (specs.width != 0) --specs.width;
+  }
+
+  memory_buffer buffer;
+  if (fspecs.format == float_format::hex) {
+    if (fspecs.sign) buffer.push_back(detail::sign<char>(fspecs.sign));
+    format_hexfloat(convert_float(value), specs.precision, fspecs, buffer);
+    return write_bytes<align::right>(out, {buffer.data(), buffer.size()},
+                                     specs);
+  }
+  int precision = specs.precision >= 0 || specs.type == presentation_type::none
+                      ? specs.precision
+                      : 6;
+  if (fspecs.format == float_format::exp) {
+    if (precision == max_value<int>())
+      throw_format_error("number is too big");
+    else
+      ++precision;
+  } else if (fspecs.format != float_format::fixed && precision == 0) {
+    precision = 1;
+  }
+  if (const_check(std::is_same<T, float>())) fspecs.binary32 = true;
+  int exp = format_float(convert_float(value), precision, fspecs, buffer);
+  fspecs.precision = precision;
+  auto f = big_decimal_fp{buffer.data(), static_cast<int>(buffer.size()), exp};
+  return write_float(out, f, specs, fspecs, loc);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_floating_point<T>::value)>
+FMT_CONSTEXPR20 auto write(OutputIt out, T value, format_specs<Char> specs,
+                           locale_ref loc = {}) -> OutputIt {
+  if (const_check(!is_supported_floating_point(value))) return out;
+  return specs.localized && write_loc(out, value, specs, loc)
+             ? out
+             : write_float(out, value, specs, loc);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_fast_float<T>::value)>
+FMT_CONSTEXPR20 auto write(OutputIt out, T value) -> OutputIt {
+  if (is_constant_evaluated()) return write(out, value, format_specs<Char>());
+  if (const_check(!is_supported_floating_point(value))) return out;
+
+  auto fspecs = float_specs();
+  if (detail::signbit(value)) {
+    fspecs.sign = sign::minus;
+    value = -value;
+  }
+
+  constexpr auto specs = format_specs<Char>();
+  using floaty = conditional_t<std::is_same<T, long double>::value, double, T>;
+  using floaty_uint = typename dragonbox::float_info<floaty>::carrier_uint;
+  floaty_uint mask = exponent_mask<floaty>();
+  if ((bit_cast<floaty_uint>(value) & mask) == mask)
+    return write_nonfinite(out, std::isnan(value), specs, fspecs);
+
+  auto dec = dragonbox::to_decimal(static_cast<floaty>(value));
+  return write_float(out, dec, specs, fspecs, {});
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_floating_point<T>::value &&
+                        !is_fast_float<T>::value)>
+inline auto write(OutputIt out, T value) -> OutputIt {
+  return write(out, value, format_specs<Char>());
+}
+
+template <typename Char, typename OutputIt>
+auto write(OutputIt out, monostate, format_specs<Char> = {}, locale_ref = {})
+    -> OutputIt {
+  FMT_ASSERT(false, "");
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> value)
+    -> OutputIt {
+  auto it = reserve(out, value.size());
+  it = copy_str_noinline<Char>(value.begin(), value.end(), it);
+  return base_iterator(out, it);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_string<T>::value)>
+constexpr auto write(OutputIt out, const T& value) -> OutputIt {
+  return write<Char>(out, to_string_view(value));
+}
+
+// FMT_ENABLE_IF() condition separated to workaround an MSVC bug.
+template <
+    typename Char, typename OutputIt, typename T,
+    bool check =
+        std::is_enum<T>::value && !std::is_same<T, Char>::value &&
+        mapped_type_constant<T, basic_format_context<OutputIt, Char>>::value !=
+            type::custom_type,
+    FMT_ENABLE_IF(check)>
+FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt {
+  return write<Char>(out, static_cast<underlying_t<T>>(value));
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(std::is_same<T, bool>::value)>
+FMT_CONSTEXPR auto write(OutputIt out, T value,
+                         const format_specs<Char>& specs = {}, locale_ref = {})
+    -> OutputIt {
+  return specs.type != presentation_type::none &&
+                 specs.type != presentation_type::string
+             ? write(out, value ? 1 : 0, specs, {})
+             : write_bytes(out, value ? "true" : "false", specs);
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, Char value) -> OutputIt {
+  auto it = reserve(out, 1);
+  *it++ = value;
+  return base_iterator(out, it);
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR_CHAR_TRAITS auto write(OutputIt out, const Char* value)
+    -> OutputIt {
+  if (value) return write(out, basic_string_view<Char>(value));
+  throw_format_error("string pointer is null");
+  return out;
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(std::is_same<T, void>::value)>
+auto write(OutputIt out, const T* value, const format_specs<Char>& specs = {},
+           locale_ref = {}) -> OutputIt {
+  return write_ptr<Char>(out, bit_cast<uintptr_t>(value), &specs);
+}
+
+// A write overload that handles implicit conversions.
+template <typename Char, typename OutputIt, typename T,
+          typename Context = basic_format_context<OutputIt, Char>>
+FMT_CONSTEXPR auto write(OutputIt out, const T& value) -> enable_if_t<
+    std::is_class<T>::value && !is_string<T>::value &&
+        !is_floating_point<T>::value && !std::is_same<T, Char>::value &&
+        !std::is_same<T, remove_cvref_t<decltype(arg_mapper<Context>().map(
+                             value))>>::value,
+    OutputIt> {
+  return write<Char>(out, arg_mapper<Context>().map(value));
+}
+
+template <typename Char, typename OutputIt, typename T,
+          typename Context = basic_format_context<OutputIt, Char>>
+FMT_CONSTEXPR auto write(OutputIt out, const T& value)
+    -> enable_if_t<mapped_type_constant<T, Context>::value == type::custom_type,
+                   OutputIt> {
+  auto ctx = Context(out, {}, {});
+  return typename Context::template formatter_type<T>().format(value, ctx);
+}
+
+// An argument visitor that formats the argument and writes it via the output
+// iterator. It's a class and not a generic lambda for compatibility with C++11.
+template <typename Char> struct default_arg_formatter {
+  using iterator = buffer_appender<Char>;
+  using context = buffer_context<Char>;
+
+  iterator out;
+  basic_format_args<context> args;
+  locale_ref loc;
+
+  template <typename T> auto operator()(T value) -> iterator {
+    return write<Char>(out, value);
+  }
+  auto operator()(typename basic_format_arg<context>::handle h) -> iterator {
+    basic_format_parse_context<Char> parse_ctx({});
+    context format_ctx(out, args, loc);
+    h.format(parse_ctx, format_ctx);
+    return format_ctx.out();
+  }
+};
+
+template <typename Char> struct arg_formatter {
+  using iterator = buffer_appender<Char>;
+  using context = buffer_context<Char>;
+
+  iterator out;
+  const format_specs<Char>& specs;
+  locale_ref locale;
+
+  template <typename T>
+  FMT_CONSTEXPR FMT_INLINE auto operator()(T value) -> iterator {
+    return detail::write(out, value, specs, locale);
+  }
+  auto operator()(typename basic_format_arg<context>::handle) -> iterator {
+    // User-defined types are handled separately because they require access
+    // to the parse context.
+    return out;
+  }
+};
+
+template <typename Char> struct custom_formatter {
+  basic_format_parse_context<Char>& parse_ctx;
+  buffer_context<Char>& ctx;
+
+  void operator()(
+      typename basic_format_arg<buffer_context<Char>>::handle h) const {
+    h.format(parse_ctx, ctx);
+  }
+  template <typename T> void operator()(T) const {}
+};
+
+template <typename ErrorHandler> class width_checker {
+ public:
+  explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {}
+
+  template <typename T, FMT_ENABLE_IF(is_integer<T>::value)>
+  FMT_CONSTEXPR auto operator()(T value) -> unsigned long long {
+    if (is_negative(value)) handler_.on_error("negative width");
+    return static_cast<unsigned long long>(value);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!is_integer<T>::value)>
+  FMT_CONSTEXPR auto operator()(T) -> unsigned long long {
+    handler_.on_error("width is not integer");
+    return 0;
+  }
+
+ private:
+  ErrorHandler& handler_;
+};
+
+template <typename ErrorHandler> class precision_checker {
+ public:
+  explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {}
+
+  template <typename T, FMT_ENABLE_IF(is_integer<T>::value)>
+  FMT_CONSTEXPR auto operator()(T value) -> unsigned long long {
+    if (is_negative(value)) handler_.on_error("negative precision");
+    return static_cast<unsigned long long>(value);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!is_integer<T>::value)>
+  FMT_CONSTEXPR auto operator()(T) -> unsigned long long {
+    handler_.on_error("precision is not integer");
+    return 0;
+  }
+
+ private:
+  ErrorHandler& handler_;
+};
+
+template <template <typename> class Handler, typename FormatArg,
+          typename ErrorHandler>
+FMT_CONSTEXPR auto get_dynamic_spec(FormatArg arg, ErrorHandler eh) -> int {
+  unsigned long long value = visit_format_arg(Handler<ErrorHandler>(eh), arg);
+  if (value > to_unsigned(max_value<int>())) eh.on_error("number is too big");
+  return static_cast<int>(value);
+}
+
+template <typename Context, typename ID>
+FMT_CONSTEXPR auto get_arg(Context& ctx, ID id) -> decltype(ctx.arg(id)) {
+  auto arg = ctx.arg(id);
+  if (!arg) ctx.on_error("argument not found");
+  return arg;
+}
+
+template <template <typename> class Handler, typename Context>
+FMT_CONSTEXPR void handle_dynamic_spec(int& value,
+                                       arg_ref<typename Context::char_type> ref,
+                                       Context& ctx) {
+  switch (ref.kind) {
+  case arg_id_kind::none:
+    break;
+  case arg_id_kind::index:
+    value = detail::get_dynamic_spec<Handler>(get_arg(ctx, ref.val.index),
+                                              ctx.error_handler());
+    break;
+  case arg_id_kind::name:
+    value = detail::get_dynamic_spec<Handler>(get_arg(ctx, ref.val.name),
+                                              ctx.error_handler());
+    break;
+  }
+}
+
+#if FMT_USE_USER_DEFINED_LITERALS
+#  if FMT_USE_NONTYPE_TEMPLATE_ARGS
+template <typename T, typename Char, size_t N,
+          fmt::detail_exported::fixed_string<Char, N> Str>
+struct statically_named_arg : view {
+  static constexpr auto name = Str.data;
+
+  const T& value;
+  statically_named_arg(const T& v) : value(v) {}
+};
+
+template <typename T, typename Char, size_t N,
+          fmt::detail_exported::fixed_string<Char, N> Str>
+struct is_named_arg<statically_named_arg<T, Char, N, Str>> : std::true_type {};
+
+template <typename T, typename Char, size_t N,
+          fmt::detail_exported::fixed_string<Char, N> Str>
+struct is_statically_named_arg<statically_named_arg<T, Char, N, Str>>
+    : std::true_type {};
+
+template <typename Char, size_t N,
+          fmt::detail_exported::fixed_string<Char, N> Str>
+struct udl_arg {
+  template <typename T> auto operator=(T&& value) const {
+    return statically_named_arg<T, Char, N, Str>(std::forward<T>(value));
+  }
+};
+#  else
+template <typename Char> struct udl_arg {
+  const Char* str;
+
+  template <typename T> auto operator=(T&& value) const -> named_arg<Char, T> {
+    return {str, std::forward<T>(value)};
+  }
+};
+#  endif
+#endif  // FMT_USE_USER_DEFINED_LITERALS
+
+template <typename Locale, typename Char>
+auto vformat(const Locale& loc, basic_string_view<Char> fmt,
+             basic_format_args<buffer_context<type_identity_t<Char>>> args)
+    -> std::basic_string<Char> {
+  auto buf = basic_memory_buffer<Char>();
+  detail::vformat_to(buf, fmt, args, detail::locale_ref(loc));
+  return {buf.data(), buf.size()};
+}
+
+using format_func = void (*)(detail::buffer<char>&, int, const char*);
+
+FMT_API void format_error_code(buffer<char>& out, int error_code,
+                               string_view message) noexcept;
+
+FMT_API void report_error(format_func func, int error_code,
+                          const char* message) noexcept;
+}  // namespace detail
+
+FMT_API auto vsystem_error(int error_code, string_view format_str,
+                           format_args args) -> std::system_error;
+
+/**
+  \rst
+  Constructs :class:`std::system_error` with a message formatted with
+  ``fmt::format(fmt, args...)``.
+  *error_code* is a system error code as given by ``errno``.
+
+  **Example**::
+
+    // This throws std::system_error with the description
+    //   cannot open file 'madeup': No such file or directory
+    // or similar (system message may vary).
+    const char* filename = "madeup";
+    std::FILE* file = std::fopen(filename, "r");
+    if (!file)
+      throw fmt::system_error(errno, "cannot open file '{}'", filename);
+  \endrst
+ */
+template <typename... T>
+auto system_error(int error_code, format_string<T...> fmt, T&&... args)
+    -> std::system_error {
+  return vsystem_error(error_code, fmt, fmt::make_format_args(args...));
+}
+
+/**
+  \rst
+  Formats an error message for an error returned by an operating system or a
+  language runtime, for example a file opening error, and writes it to *out*.
+  The format is the same as the one used by ``std::system_error(ec, message)``
+  where ``ec`` is ``std::error_code(error_code, std::generic_category()})``.
+  It is implementation-defined but normally looks like:
+
+  .. parsed-literal::
+     *<message>*: *<system-message>*
+
+  where *<message>* is the passed message and *<system-message>* is the system
+  message corresponding to the error code.
+  *error_code* is a system error code as given by ``errno``.
+  \endrst
+ */
+FMT_API void format_system_error(detail::buffer<char>& out, int error_code,
+                                 const char* message) noexcept;
+
+// Reports a system error without throwing an exception.
+// Can be used to report errors from destructors.
+FMT_API void report_system_error(int error_code, const char* message) noexcept;
+
+/** Fast integer formatter. */
+class format_int {
+ private:
+  // Buffer should be large enough to hold all digits (digits10 + 1),
+  // a sign and a null character.
+  enum { buffer_size = std::numeric_limits<unsigned long long>::digits10 + 3 };
+  mutable char buffer_[buffer_size];
+  char* str_;
+
+  template <typename UInt> auto format_unsigned(UInt value) -> char* {
+    auto n = static_cast<detail::uint32_or_64_or_128_t<UInt>>(value);
+    return detail::format_decimal(buffer_, n, buffer_size - 1).begin;
+  }
+
+  template <typename Int> auto format_signed(Int value) -> char* {
+    auto abs_value = static_cast<detail::uint32_or_64_or_128_t<Int>>(value);
+    bool negative = value < 0;
+    if (negative) abs_value = 0 - abs_value;
+    auto begin = format_unsigned(abs_value);
+    if (negative) *--begin = '-';
+    return begin;
+  }
+
+ public:
+  explicit format_int(int value) : str_(format_signed(value)) {}
+  explicit format_int(long value) : str_(format_signed(value)) {}
+  explicit format_int(long long value) : str_(format_signed(value)) {}
+  explicit format_int(unsigned value) : str_(format_unsigned(value)) {}
+  explicit format_int(unsigned long value) : str_(format_unsigned(value)) {}
+  explicit format_int(unsigned long long value)
+      : str_(format_unsigned(value)) {}
+
+  /** Returns the number of characters written to the output buffer. */
+  auto size() const -> size_t {
+    return detail::to_unsigned(buffer_ - str_ + buffer_size - 1);
+  }
+
+  /**
+    Returns a pointer to the output buffer content. No terminating null
+    character is appended.
+   */
+  auto data() const -> const char* { return str_; }
+
+  /**
+    Returns a pointer to the output buffer content with terminating null
+    character appended.
+   */
+  auto c_str() const -> const char* {
+    buffer_[buffer_size - 1] = '\0';
+    return str_;
+  }
+
+  /**
+    \rst
+    Returns the content of the output buffer as an ``std::string``.
+    \endrst
+   */
+  auto str() const -> std::string { return std::string(str_, size()); }
+};
+
+template <typename T, typename Char>
+struct formatter<T, Char, enable_if_t<detail::has_format_as<T>::value>>
+    : private formatter<detail::format_as_t<T>, Char> {
+  using base = formatter<detail::format_as_t<T>, Char>;
+  using base::parse;
+
+  template <typename FormatContext>
+  auto format(const T& value, FormatContext& ctx) const -> decltype(ctx.out()) {
+    return base::format(format_as(value), ctx);
+  }
+};
+
+#define FMT_FORMAT_AS(Type, Base) \
+  template <typename Char>        \
+  struct formatter<Type, Char> : formatter<Base, Char> {}
+
+FMT_FORMAT_AS(signed char, int);
+FMT_FORMAT_AS(unsigned char, unsigned);
+FMT_FORMAT_AS(short, int);
+FMT_FORMAT_AS(unsigned short, unsigned);
+FMT_FORMAT_AS(long, detail::long_type);
+FMT_FORMAT_AS(unsigned long, detail::ulong_type);
+FMT_FORMAT_AS(Char*, const Char*);
+FMT_FORMAT_AS(std::basic_string<Char>, basic_string_view<Char>);
+FMT_FORMAT_AS(std::nullptr_t, const void*);
+FMT_FORMAT_AS(detail::std_string_view<Char>, basic_string_view<Char>);
+FMT_FORMAT_AS(void*, const void*);
+
+template <typename Char, size_t N>
+struct formatter<Char[N], Char> : formatter<basic_string_view<Char>, Char> {};
+
+/**
+  \rst
+  Converts ``p`` to ``const void*`` for pointer formatting.
+
+  **Example**::
+
+    auto s = fmt::format("{}", fmt::ptr(p));
+  \endrst
+ */
+template <typename T> auto ptr(T p) -> const void* {
+  static_assert(std::is_pointer<T>::value, "");
+  return detail::bit_cast<const void*>(p);
+}
+template <typename T, typename Deleter>
+auto ptr(const std::unique_ptr<T, Deleter>& p) -> const void* {
+  return p.get();
+}
+template <typename T> auto ptr(const std::shared_ptr<T>& p) -> const void* {
+  return p.get();
+}
+
+/**
+  \rst
+  Converts ``e`` to the underlying type.
+
+  **Example**::
+
+    enum class color { red, green, blue };
+    auto s = fmt::format("{}", fmt::underlying(color::red));
+  \endrst
+ */
+template <typename Enum>
+constexpr auto underlying(Enum e) noexcept -> underlying_t<Enum> {
+  return static_cast<underlying_t<Enum>>(e);
+}
+
+namespace enums {
+template <typename Enum, FMT_ENABLE_IF(std::is_enum<Enum>::value)>
+constexpr auto format_as(Enum e) noexcept -> underlying_t<Enum> {
+  return static_cast<underlying_t<Enum>>(e);
+}
+}  // namespace enums
+
+class bytes {
+ private:
+  string_view data_;
+  friend struct formatter<bytes>;
+
+ public:
+  explicit bytes(string_view data) : data_(data) {}
+};
+
+template <> struct formatter<bytes> {
+ private:
+  detail::dynamic_format_specs<> specs_;
+
+ public:
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> const char* {
+    return parse_format_specs(ctx.begin(), ctx.end(), specs_, ctx,
+                              detail::type::string_type);
+  }
+
+  template <typename FormatContext>
+  auto format(bytes b, FormatContext& ctx) -> decltype(ctx.out()) {
+    detail::handle_dynamic_spec<detail::width_checker>(specs_.width,
+                                                       specs_.width_ref, ctx);
+    detail::handle_dynamic_spec<detail::precision_checker>(
+        specs_.precision, specs_.precision_ref, ctx);
+    return detail::write_bytes(ctx.out(), b.data_, specs_);
+  }
+};
+
+// group_digits_view is not derived from view because it copies the argument.
+template <typename T> struct group_digits_view {
+  T value;
+};
+
+/**
+  \rst
+  Returns a view that formats an integer value using ',' as a locale-independent
+  thousands separator.
+
+  **Example**::
+
+    fmt::print("{}", fmt::group_digits(12345));
+    // Output: "12,345"
+  \endrst
+ */
+template <typename T> auto group_digits(T value) -> group_digits_view<T> {
+  return {value};
+}
+
+template <typename T> struct formatter<group_digits_view<T>> : formatter<T> {
+ private:
+  detail::dynamic_format_specs<> specs_;
+
+ public:
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> const char* {
+    return parse_format_specs(ctx.begin(), ctx.end(), specs_, ctx,
+                              detail::type::int_type);
+  }
+
+  template <typename FormatContext>
+  auto format(group_digits_view<T> t, FormatContext& ctx)
+      -> decltype(ctx.out()) {
+    detail::handle_dynamic_spec<detail::width_checker>(specs_.width,
+                                                       specs_.width_ref, ctx);
+    detail::handle_dynamic_spec<detail::precision_checker>(
+        specs_.precision, specs_.precision_ref, ctx);
+    return detail::write_int(
+        ctx.out(), static_cast<detail::uint64_or_128_t<T>>(t.value), 0, specs_,
+        detail::digit_grouping<char>("\3", ","));
+  }
+};
+
+// DEPRECATED! join_view will be moved to ranges.h.
+template <typename It, typename Sentinel, typename Char = char>
+struct join_view : detail::view {
+  It begin;
+  Sentinel end;
+  basic_string_view<Char> sep;
+
+  join_view(It b, Sentinel e, basic_string_view<Char> s)
+      : begin(b), end(e), sep(s) {}
+};
+
+template <typename It, typename Sentinel, typename Char>
+struct formatter<join_view<It, Sentinel, Char>, Char> {
+ private:
+  using value_type =
+#ifdef __cpp_lib_ranges
+      std::iter_value_t<It>;
+#else
+      typename std::iterator_traits<It>::value_type;
+#endif
+  formatter<remove_cvref_t<value_type>, Char> value_formatter_;
+
+ public:
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> const Char* {
+    return value_formatter_.parse(ctx);
+  }
+
+  template <typename FormatContext>
+  auto format(const join_view<It, Sentinel, Char>& value,
+              FormatContext& ctx) const -> decltype(ctx.out()) {
+    auto it = value.begin;
+    auto out = ctx.out();
+    if (it != value.end) {
+      out = value_formatter_.format(*it, ctx);
+      ++it;
+      while (it != value.end) {
+        out = detail::copy_str<Char>(value.sep.begin(), value.sep.end(), out);
+        ctx.advance_to(out);
+        out = value_formatter_.format(*it, ctx);
+        ++it;
+      }
+    }
+    return out;
+  }
+};
+
+/**
+  Returns a view that formats the iterator range `[begin, end)` with elements
+  separated by `sep`.
+ */
+template <typename It, typename Sentinel>
+auto join(It begin, Sentinel end, string_view sep) -> join_view<It, Sentinel> {
+  return {begin, end, sep};
+}
+
+/**
+  \rst
+  Returns a view that formats `range` with elements separated by `sep`.
+
+  **Example**::
+
+    std::vector<int> v = {1, 2, 3};
+    fmt::print("{}", fmt::join(v, ", "));
+    // Output: "1, 2, 3"
+
+  ``fmt::join`` applies passed format specifiers to the range elements::
+
+    fmt::print("{:02}", fmt::join(v, ", "));
+    // Output: "01, 02, 03"
+  \endrst
+ */
+template <typename Range>
+auto join(Range&& range, string_view sep)
+    -> join_view<detail::iterator_t<Range>, detail::sentinel_t<Range>> {
+  return join(std::begin(range), std::end(range), sep);
+}
+
+/**
+  \rst
+  Converts *value* to ``std::string`` using the default format for type *T*.
+
+  **Example**::
+
+    #include <fmt/format.h>
+
+    std::string answer = fmt::to_string(42);
+  \endrst
+ */
+template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value &&
+                                    !detail::has_format_as<T>::value)>
+inline auto to_string(const T& value) -> std::string {
+  auto buffer = memory_buffer();
+  detail::write<char>(appender(buffer), value);
+  return {buffer.data(), buffer.size()};
+}
+
+template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+FMT_NODISCARD inline auto to_string(T value) -> std::string {
+  // The buffer should be large enough to store the number including the sign
+  // or "false" for bool.
+  constexpr int max_size = detail::digits10<T>() + 2;
+  char buffer[max_size > 5 ? static_cast<unsigned>(max_size) : 5];
+  char* begin = buffer;
+  return std::string(begin, detail::write<char>(begin, value));
+}
+
+template <typename Char, size_t SIZE>
+FMT_NODISCARD auto to_string(const basic_memory_buffer<Char, SIZE>& buf)
+    -> std::basic_string<Char> {
+  auto size = buf.size();
+  detail::assume(size < std::basic_string<Char>().max_size());
+  return std::basic_string<Char>(buf.data(), size);
+}
+
+template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value &&
+                                    detail::has_format_as<T>::value)>
+inline auto to_string(const T& value) -> std::string {
+  return to_string(format_as(value));
+}
+
+FMT_END_EXPORT
+
+namespace detail {
+
+template <typename Char>
+void vformat_to(buffer<Char>& buf, basic_string_view<Char> fmt,
+                typename vformat_args<Char>::type args, locale_ref loc) {
+  auto out = buffer_appender<Char>(buf);
+  if (fmt.size() == 2 && equal2(fmt.data(), "{}")) {
+    auto arg = args.get(0);
+    if (!arg) error_handler().on_error("argument not found");
+    visit_format_arg(default_arg_formatter<Char>{out, args, loc}, arg);
+    return;
+  }
+
+  struct format_handler : error_handler {
+    basic_format_parse_context<Char> parse_context;
+    buffer_context<Char> context;
+
+    format_handler(buffer_appender<Char> p_out, basic_string_view<Char> str,
+                   basic_format_args<buffer_context<Char>> p_args,
+                   locale_ref p_loc)
+        : parse_context(str), context(p_out, p_args, p_loc) {}
+
+    void on_text(const Char* begin, const Char* end) {
+      auto text = basic_string_view<Char>(begin, to_unsigned(end - begin));
+      context.advance_to(write<Char>(context.out(), text));
+    }
+
+    FMT_CONSTEXPR auto on_arg_id() -> int {
+      return parse_context.next_arg_id();
+    }
+    FMT_CONSTEXPR auto on_arg_id(int id) -> int {
+      return parse_context.check_arg_id(id), id;
+    }
+    FMT_CONSTEXPR auto on_arg_id(basic_string_view<Char> id) -> int {
+      int arg_id = context.arg_id(id);
+      if (arg_id < 0) on_error("argument not found");
+      return arg_id;
+    }
+
+    FMT_INLINE void on_replacement_field(int id, const Char*) {
+      auto arg = get_arg(context, id);
+      context.advance_to(visit_format_arg(
+          default_arg_formatter<Char>{context.out(), context.args(),
+                                      context.locale()},
+          arg));
+    }
+
+    auto on_format_specs(int id, const Char* begin, const Char* end)
+        -> const Char* {
+      auto arg = get_arg(context, id);
+      if (arg.type() == type::custom_type) {
+        parse_context.advance_to(begin);
+        visit_format_arg(custom_formatter<Char>{parse_context, context}, arg);
+        return parse_context.begin();
+      }
+      auto specs = detail::dynamic_format_specs<Char>();
+      begin = parse_format_specs(begin, end, specs, parse_context, arg.type());
+      detail::handle_dynamic_spec<detail::width_checker>(
+          specs.width, specs.width_ref, context);
+      detail::handle_dynamic_spec<detail::precision_checker>(
+          specs.precision, specs.precision_ref, context);
+      if (begin == end || *begin != '}')
+        on_error("missing '}' in format string");
+      auto f = arg_formatter<Char>{context.out(), specs, context.locale()};
+      context.advance_to(visit_format_arg(f, arg));
+      return begin;
+    }
+  };
+  detail::parse_format_string<false>(fmt, format_handler(out, fmt, args, loc));
+}
+
+FMT_BEGIN_EXPORT
+
+#ifndef FMT_HEADER_ONLY
+extern template FMT_API void vformat_to(buffer<char>&, string_view,
+                                        typename vformat_args<>::type,
+                                        locale_ref);
+extern template FMT_API auto thousands_sep_impl<char>(locale_ref)
+    -> thousands_sep_result<char>;
+extern template FMT_API auto thousands_sep_impl<wchar_t>(locale_ref)
+    -> thousands_sep_result<wchar_t>;
+extern template FMT_API auto decimal_point_impl(locale_ref) -> char;
+extern template FMT_API auto decimal_point_impl(locale_ref) -> wchar_t;
+#endif  // FMT_HEADER_ONLY
+
+}  // namespace detail
+
+#if FMT_USE_USER_DEFINED_LITERALS
+inline namespace literals {
+/**
+  \rst
+  User-defined literal equivalent of :func:`fmt::arg`.
+
+  **Example**::
+
+    using namespace fmt::literals;
+    fmt::print("Elapsed time: {s:.2f} seconds", "s"_a=1.23);
+  \endrst
+ */
+#  if FMT_USE_NONTYPE_TEMPLATE_ARGS
+template <detail_exported::fixed_string Str> constexpr auto operator""_a() {
+  using char_t = remove_cvref_t<decltype(Str.data[0])>;
+  return detail::udl_arg<char_t, sizeof(Str.data) / sizeof(char_t), Str>();
+}
+#  else
+constexpr auto operator"" _a(const char* s, size_t) -> detail::udl_arg<char> {
+  return {s};
+}
+#  endif
+}  // namespace literals
+#endif  // FMT_USE_USER_DEFINED_LITERALS
+
+template <typename Locale, FMT_ENABLE_IF(detail::is_locale<Locale>::value)>
+inline auto vformat(const Locale& loc, string_view fmt, format_args args)
+    -> std::string {
+  return detail::vformat(loc, fmt, args);
+}
+
+template <typename Locale, typename... T,
+          FMT_ENABLE_IF(detail::is_locale<Locale>::value)>
+inline auto format(const Locale& loc, format_string<T...> fmt, T&&... args)
+    -> std::string {
+  return fmt::vformat(loc, string_view(fmt), fmt::make_format_args(args...));
+}
+
+template <typename OutputIt, typename Locale,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value&&
+                            detail::is_locale<Locale>::value)>
+auto vformat_to(OutputIt out, const Locale& loc, string_view fmt,
+                format_args args) -> OutputIt {
+  using detail::get_buffer;
+  auto&& buf = get_buffer<char>(out);
+  detail::vformat_to(buf, fmt, args, detail::locale_ref(loc));
+  return detail::get_iterator(buf, out);
+}
+
+template <typename OutputIt, typename Locale, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value&&
+                            detail::is_locale<Locale>::value)>
+FMT_INLINE auto format_to(OutputIt out, const Locale& loc,
+                          format_string<T...> fmt, T&&... args) -> OutputIt {
+  return vformat_to(out, loc, fmt, fmt::make_format_args(args...));
+}
+
+template <typename Locale, typename... T,
+          FMT_ENABLE_IF(detail::is_locale<Locale>::value)>
+FMT_NODISCARD FMT_INLINE auto formatted_size(const Locale& loc,
+                                             format_string<T...> fmt,
+                                             T&&... args) -> size_t {
+  auto buf = detail::counting_buffer<>();
+  detail::vformat_to<char>(buf, fmt, fmt::make_format_args(args...),
+                           detail::locale_ref(loc));
+  return buf.count();
+}
+
+FMT_END_EXPORT
+
+template <typename T, typename Char>
+template <typename FormatContext>
+FMT_CONSTEXPR FMT_INLINE auto
+formatter<T, Char,
+          enable_if_t<detail::type_constant<T, Char>::value !=
+                      detail::type::custom_type>>::format(const T& val,
+                                                          FormatContext& ctx)
+    const -> decltype(ctx.out()) {
+  if (specs_.width_ref.kind != detail::arg_id_kind::none ||
+      specs_.precision_ref.kind != detail::arg_id_kind::none) {
+    auto specs = specs_;
+    detail::handle_dynamic_spec<detail::width_checker>(specs.width,
+                                                       specs.width_ref, ctx);
+    detail::handle_dynamic_spec<detail::precision_checker>(
+        specs.precision, specs.precision_ref, ctx);
+    return detail::write<Char>(ctx.out(), val, specs, ctx.locale());
+  }
+  return detail::write<Char>(ctx.out(), val, specs_, ctx.locale());
+}
+
+FMT_END_NAMESPACE
+
+#ifdef FMT_HEADER_ONLY
+#  define FMT_FUNC inline
+#  include "format-inl.h"
+#else
+#  define FMT_FUNC
+#endif
+
+#endif  // FMT_FORMAT_H_
diff --git a/Genie/Genie/src/qualla/include/fmt/os.h b/Genie/Genie/src/qualla/include/fmt/os.h
new file mode 100644
index 0000000000000000000000000000000000000000..2126424d39cbf311a5806545a255c9a3166bf170
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/fmt/os.h
@@ -0,0 +1,451 @@
+// Formatting library for C++ - optional OS-specific functionality
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_OS_H_
+#define FMT_OS_H_
+
+#include <cerrno>
+#include <cstddef>
+#include <cstdio>
+#include <system_error>  // std::system_error
+
+#if defined __APPLE__ || defined(__FreeBSD__)
+#  include <xlocale.h>  // for LC_NUMERIC_MASK on OS X
+#endif
+
+#include "format.h"
+
+#ifndef FMT_USE_FCNTL
+// UWP doesn't provide _pipe.
+#  if FMT_HAS_INCLUDE("winapifamily.h")
+#    include <winapifamily.h>
+#  endif
+#  if (FMT_HAS_INCLUDE(<fcntl.h>) || defined(__APPLE__) || \
+       defined(__linux__)) &&                              \
+      (!defined(WINAPI_FAMILY) ||                          \
+       (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP))
+#    include <fcntl.h>  // for O_RDONLY
+#    define FMT_USE_FCNTL 1
+#  else
+#    define FMT_USE_FCNTL 0
+#  endif
+#endif
+
+#ifndef FMT_POSIX
+#  if defined(_WIN32) && !defined(__MINGW32__)
+// Fix warnings about deprecated symbols.
+#    define FMT_POSIX(call) _##call
+#  else
+#    define FMT_POSIX(call) call
+#  endif
+#endif
+
+// Calls to system functions are wrapped in FMT_SYSTEM for testability.
+#ifdef FMT_SYSTEM
+#  define FMT_POSIX_CALL(call) FMT_SYSTEM(call)
+#else
+#  define FMT_SYSTEM(call) ::call
+#  ifdef _WIN32
+// Fix warnings about deprecated symbols.
+#    define FMT_POSIX_CALL(call) ::_##call
+#  else
+#    define FMT_POSIX_CALL(call) ::call
+#  endif
+#endif
+
+// Retries the expression while it evaluates to error_result and errno
+// equals to EINTR.
+#ifndef _WIN32
+#  define FMT_RETRY_VAL(result, expression, error_result) \
+    do {                                                  \
+      (result) = (expression);                            \
+    } while ((result) == (error_result) && errno == EINTR)
+#else
+#  define FMT_RETRY_VAL(result, expression, error_result) result = (expression)
+#endif
+
+#define FMT_RETRY(result, expression) FMT_RETRY_VAL(result, expression, -1)
+
+FMT_BEGIN_NAMESPACE
+FMT_BEGIN_EXPORT
+
+/**
+  \rst
+  A reference to a null-terminated string. It can be constructed from a C
+  string or ``std::string``.
+
+  You can use one of the following type aliases for common character types:
+
+  +---------------+-----------------------------+
+  | Type          | Definition                  |
+  +===============+=============================+
+  | cstring_view  | basic_cstring_view<char>    |
+  +---------------+-----------------------------+
+  | wcstring_view | basic_cstring_view<wchar_t> |
+  +---------------+-----------------------------+
+
+  This class is most useful as a parameter type to allow passing
+  different types of strings to a function, for example::
+
+    template <typename... Args>
+    std::string format(cstring_view format_str, const Args & ... args);
+
+    format("{}", 42);
+    format(std::string("{}"), 42);
+  \endrst
+ */
+template <typename Char> class basic_cstring_view {
+ private:
+  const Char* data_;
+
+ public:
+  /** Constructs a string reference object from a C string. */
+  basic_cstring_view(const Char* s) : data_(s) {}
+
+  /**
+    \rst
+    Constructs a string reference from an ``std::string`` object.
+    \endrst
+   */
+  basic_cstring_view(const std::basic_string<Char>& s) : data_(s.c_str()) {}
+
+  /** Returns the pointer to a C string. */
+  const Char* c_str() const { return data_; }
+};
+
+using cstring_view = basic_cstring_view<char>;
+using wcstring_view = basic_cstring_view<wchar_t>;
+
+#ifdef _WIN32
+FMT_API const std::error_category& system_category() noexcept;
+
+namespace detail {
+FMT_API void format_windows_error(buffer<char>& out, int error_code,
+                                  const char* message) noexcept;
+}
+
+FMT_API std::system_error vwindows_error(int error_code, string_view format_str,
+                                         format_args args);
+
+/**
+ \rst
+ Constructs a :class:`std::system_error` object with the description
+ of the form
+
+ .. parsed-literal::
+   *<message>*: *<system-message>*
+
+ where *<message>* is the formatted message and *<system-message>* is the
+ system message corresponding to the error code.
+ *error_code* is a Windows error code as given by ``GetLastError``.
+ If *error_code* is not a valid error code such as -1, the system message
+ will look like "error -1".
+
+ **Example**::
+
+   // This throws a system_error with the description
+   //   cannot open file 'madeup': The system cannot find the file specified.
+   // or similar (system message may vary).
+   const char *filename = "madeup";
+   LPOFSTRUCT of = LPOFSTRUCT();
+   HFILE file = OpenFile(filename, &of, OF_READ);
+   if (file == HFILE_ERROR) {
+     throw fmt::windows_error(GetLastError(),
+                              "cannot open file '{}'", filename);
+   }
+ \endrst
+*/
+template <typename... Args>
+std::system_error windows_error(int error_code, string_view message,
+                                const Args&... args) {
+  return vwindows_error(error_code, message, fmt::make_format_args(args...));
+}
+
+// Reports a Windows error without throwing an exception.
+// Can be used to report errors from destructors.
+FMT_API void report_windows_error(int error_code, const char* message) noexcept;
+#else
+inline const std::error_category& system_category() noexcept {
+  return std::system_category();
+}
+#endif  // _WIN32
+
+// std::system is not available on some platforms such as iOS (#2248).
+#ifdef __OSX__
+template <typename S, typename... Args, typename Char = char_t<S>>
+void say(const S& format_str, Args&&... args) {
+  std::system(format("say \"{}\"", format(format_str, args...)).c_str());
+}
+#endif
+
+// A buffered file.
+class buffered_file {
+ private:
+  FILE* file_;
+
+  friend class file;
+
+  explicit buffered_file(FILE* f) : file_(f) {}
+
+ public:
+  buffered_file(const buffered_file&) = delete;
+  void operator=(const buffered_file&) = delete;
+
+  // Constructs a buffered_file object which doesn't represent any file.
+  buffered_file() noexcept : file_(nullptr) {}
+
+  // Destroys the object closing the file it represents if any.
+  FMT_API ~buffered_file() noexcept;
+
+ public:
+  buffered_file(buffered_file&& other) noexcept : file_(other.file_) {
+    other.file_ = nullptr;
+  }
+
+  buffered_file& operator=(buffered_file&& other) {
+    close();
+    file_ = other.file_;
+    other.file_ = nullptr;
+    return *this;
+  }
+
+  // Opens a file.
+  FMT_API buffered_file(cstring_view filename, cstring_view mode);
+
+  // Closes the file.
+  FMT_API void close();
+
+  // Returns the pointer to a FILE object representing this file.
+  FILE* get() const noexcept { return file_; }
+
+  FMT_API int descriptor() const;
+
+  void vprint(string_view format_str, format_args args) {
+    fmt::vprint(file_, format_str, args);
+  }
+
+  template <typename... Args>
+  inline void print(string_view format_str, const Args&... args) {
+    vprint(format_str, fmt::make_format_args(args...));
+  }
+};
+
+#if FMT_USE_FCNTL
+// A file. Closed file is represented by a file object with descriptor -1.
+// Methods that are not declared with noexcept may throw
+// fmt::system_error in case of failure. Note that some errors such as
+// closing the file multiple times will cause a crash on Windows rather
+// than an exception. You can get standard behavior by overriding the
+// invalid parameter handler with _set_invalid_parameter_handler.
+class FMT_API file {
+ private:
+  int fd_;  // File descriptor.
+
+  // Constructs a file object with a given descriptor.
+  explicit file(int fd) : fd_(fd) {}
+
+ public:
+  // Possible values for the oflag argument to the constructor.
+  enum {
+    RDONLY = FMT_POSIX(O_RDONLY),  // Open for reading only.
+    WRONLY = FMT_POSIX(O_WRONLY),  // Open for writing only.
+    RDWR = FMT_POSIX(O_RDWR),      // Open for reading and writing.
+    CREATE = FMT_POSIX(O_CREAT),   // Create if the file doesn't exist.
+    APPEND = FMT_POSIX(O_APPEND),  // Open in append mode.
+    TRUNC = FMT_POSIX(O_TRUNC)     // Truncate the content of the file.
+  };
+
+  // Constructs a file object which doesn't represent any file.
+  file() noexcept : fd_(-1) {}
+
+  // Opens a file and constructs a file object representing this file.
+  file(cstring_view path, int oflag);
+
+ public:
+  file(const file&) = delete;
+  void operator=(const file&) = delete;
+
+  file(file&& other) noexcept : fd_(other.fd_) { other.fd_ = -1; }
+
+  // Move assignment is not noexcept because close may throw.
+  file& operator=(file&& other) {
+    close();
+    fd_ = other.fd_;
+    other.fd_ = -1;
+    return *this;
+  }
+
+  // Destroys the object closing the file it represents if any.
+  ~file() noexcept;
+
+  // Returns the file descriptor.
+  int descriptor() const noexcept { return fd_; }
+
+  // Closes the file.
+  void close();
+
+  // Returns the file size. The size has signed type for consistency with
+  // stat::st_size.
+  long long size() const;
+
+  // Attempts to read count bytes from the file into the specified buffer.
+  size_t read(void* buffer, size_t count);
+
+  // Attempts to write count bytes from the specified buffer to the file.
+  size_t write(const void* buffer, size_t count);
+
+  // Duplicates a file descriptor with the dup function and returns
+  // the duplicate as a file object.
+  static file dup(int fd);
+
+  // Makes fd be the copy of this file descriptor, closing fd first if
+  // necessary.
+  void dup2(int fd);
+
+  // Makes fd be the copy of this file descriptor, closing fd first if
+  // necessary.
+  void dup2(int fd, std::error_code& ec) noexcept;
+
+  // Creates a pipe setting up read_end and write_end file objects for reading
+  // and writing respectively.
+  static void pipe(file& read_end, file& write_end);
+
+  // Creates a buffered_file object associated with this file and detaches
+  // this file object from the file.
+  buffered_file fdopen(const char* mode);
+
+#  if defined(_WIN32) && !defined(__MINGW32__)
+  // Opens a file and constructs a file object representing this file by
+  // wcstring_view filename. Windows only.
+  static file open_windows_file(wcstring_view path, int oflag);
+#  endif
+};
+
+// Returns the memory page size.
+long getpagesize();
+
+namespace detail {
+
+struct buffer_size {
+  buffer_size() = default;
+  size_t value = 0;
+  buffer_size operator=(size_t val) const {
+    auto bs = buffer_size();
+    bs.value = val;
+    return bs;
+  }
+};
+
+struct ostream_params {
+  int oflag = file::WRONLY | file::CREATE | file::TRUNC;
+  size_t buffer_size = BUFSIZ > 32768 ? BUFSIZ : 32768;
+
+  ostream_params() {}
+
+  template <typename... T>
+  ostream_params(T... params, int new_oflag) : ostream_params(params...) {
+    oflag = new_oflag;
+  }
+
+  template <typename... T>
+  ostream_params(T... params, detail::buffer_size bs)
+      : ostream_params(params...) {
+    this->buffer_size = bs.value;
+  }
+
+// Intel has a bug that results in failure to deduce a constructor
+// for empty parameter packs.
+#  if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 2000
+  ostream_params(int new_oflag) : oflag(new_oflag) {}
+  ostream_params(detail::buffer_size bs) : buffer_size(bs.value) {}
+#  endif
+};
+
+class file_buffer final : public buffer<char> {
+  file file_;
+
+  FMT_API void grow(size_t) override;
+
+ public:
+  FMT_API file_buffer(cstring_view path, const ostream_params& params);
+  FMT_API file_buffer(file_buffer&& other);
+  FMT_API ~file_buffer();
+
+  void flush() {
+    if (size() == 0) return;
+    file_.write(data(), size() * sizeof(data()[0]));
+    clear();
+  }
+
+  void close() {
+    flush();
+    file_.close();
+  }
+};
+
+}  // namespace detail
+
+// Added {} below to work around default constructor error known to
+// occur in Xcode versions 7.2.1 and 8.2.1.
+constexpr detail::buffer_size buffer_size{};
+
+/** A fast output stream which is not thread-safe. */
+class FMT_API ostream {
+ private:
+  FMT_MSC_WARNING(suppress : 4251)
+  detail::file_buffer buffer_;
+
+  ostream(cstring_view path, const detail::ostream_params& params)
+      : buffer_(path, params) {}
+
+ public:
+  ostream(ostream&& other) : buffer_(std::move(other.buffer_)) {}
+
+  ~ostream();
+
+  void flush() { buffer_.flush(); }
+
+  template <typename... T>
+  friend ostream output_file(cstring_view path, T... params);
+
+  void close() { buffer_.close(); }
+
+  /**
+    Formats ``args`` according to specifications in ``fmt`` and writes the
+    output to the file.
+   */
+  template <typename... T> void print(format_string<T...> fmt, T&&... args) {
+    vformat_to(detail::buffer_appender<char>(buffer_), fmt,
+               fmt::make_format_args(args...));
+  }
+};
+
+/**
+  \rst
+  Opens a file for writing. Supported parameters passed in *params*:
+
+  * ``<integer>``: Flags passed to `open
+    <https://pubs.opengroup.org/onlinepubs/007904875/functions/open.html>`_
+    (``file::WRONLY | file::CREATE | file::TRUNC`` by default)
+  * ``buffer_size=<integer>``: Output buffer size
+
+  **Example**::
+
+    auto out = fmt::output_file("guide.txt");
+    out.print("Don't {}", "Panic");
+  \endrst
+ */
+template <typename... T>
+inline ostream output_file(cstring_view path, T... params) {
+  return {path, detail::ostream_params(params...)};
+}
+#endif  // FMT_USE_FCNTL
+
+FMT_END_EXPORT
+FMT_END_NAMESPACE
+
+#endif  // FMT_OS_H_
diff --git a/Genie/Genie/src/qualla/include/fmt/ranges.h b/Genie/Genie/src/qualla/include/fmt/ranges.h
new file mode 100644
index 0000000000000000000000000000000000000000..65beba5bfccc52b08c47be15a26972280c5d394f
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/fmt/ranges.h
@@ -0,0 +1,735 @@
+// Formatting library for C++ - experimental range support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+//
+// Copyright (c) 2018 - present, Remotion (Igor Schulz)
+// All Rights Reserved
+// {fmt} support for ranges, containers and types tuple interface.
+
+#ifndef FMT_RANGES_H_
+#define FMT_RANGES_H_
+
+#include <initializer_list>
+#include <tuple>
+#include <type_traits>
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+
+namespace detail {
+
+template <typename Range, typename OutputIt>
+auto copy(const Range& range, OutputIt out) -> OutputIt {
+  for (auto it = range.begin(), end = range.end(); it != end; ++it)
+    *out++ = *it;
+  return out;
+}
+
+template <typename OutputIt>
+auto copy(const char* str, OutputIt out) -> OutputIt {
+  while (*str) *out++ = *str++;
+  return out;
+}
+
+template <typename OutputIt> auto copy(char ch, OutputIt out) -> OutputIt {
+  *out++ = ch;
+  return out;
+}
+
+template <typename OutputIt> auto copy(wchar_t ch, OutputIt out) -> OutputIt {
+  *out++ = ch;
+  return out;
+}
+
+// Returns true if T has a std::string-like interface, like std::string_view.
+template <typename T> class is_std_string_like {
+  template <typename U>
+  static auto check(U* p)
+      -> decltype((void)p->find('a'), p->length(), (void)p->data(), int());
+  template <typename> static void check(...);
+
+ public:
+  static constexpr const bool value =
+      is_string<T>::value ||
+      std::is_convertible<T, std_string_view<char>>::value ||
+      !std::is_void<decltype(check<T>(nullptr))>::value;
+};
+
+template <typename Char>
+struct is_std_string_like<fmt::basic_string_view<Char>> : std::true_type {};
+
+template <typename T> class is_map {
+  template <typename U> static auto check(U*) -> typename U::mapped_type;
+  template <typename> static void check(...);
+
+ public:
+#ifdef FMT_FORMAT_MAP_AS_LIST  // DEPRECATED!
+  static constexpr const bool value = false;
+#else
+  static constexpr const bool value =
+      !std::is_void<decltype(check<T>(nullptr))>::value;
+#endif
+};
+
+template <typename T> class is_set {
+  template <typename U> static auto check(U*) -> typename U::key_type;
+  template <typename> static void check(...);
+
+ public:
+#ifdef FMT_FORMAT_SET_AS_LIST  // DEPRECATED!
+  static constexpr const bool value = false;
+#else
+  static constexpr const bool value =
+      !std::is_void<decltype(check<T>(nullptr))>::value && !is_map<T>::value;
+#endif
+};
+
+template <typename... Ts> struct conditional_helper {};
+
+template <typename T, typename _ = void> struct is_range_ : std::false_type {};
+
+#if !FMT_MSC_VERSION || FMT_MSC_VERSION > 1800
+
+#  define FMT_DECLTYPE_RETURN(val)  \
+    ->decltype(val) { return val; } \
+    static_assert(                  \
+        true, "")  // This makes it so that a semicolon is required after the
+                   // macro, which helps clang-format handle the formatting.
+
+// C array overload
+template <typename T, std::size_t N>
+auto range_begin(const T (&arr)[N]) -> const T* {
+  return arr;
+}
+template <typename T, std::size_t N>
+auto range_end(const T (&arr)[N]) -> const T* {
+  return arr + N;
+}
+
+template <typename T, typename Enable = void>
+struct has_member_fn_begin_end_t : std::false_type {};
+
+template <typename T>
+struct has_member_fn_begin_end_t<T, void_t<decltype(std::declval<T>().begin()),
+                                           decltype(std::declval<T>().end())>>
+    : std::true_type {};
+
+// Member function overload
+template <typename T>
+auto range_begin(T&& rng) FMT_DECLTYPE_RETURN(static_cast<T&&>(rng).begin());
+template <typename T>
+auto range_end(T&& rng) FMT_DECLTYPE_RETURN(static_cast<T&&>(rng).end());
+
+// ADL overload. Only participates in overload resolution if member functions
+// are not found.
+template <typename T>
+auto range_begin(T&& rng)
+    -> enable_if_t<!has_member_fn_begin_end_t<T&&>::value,
+                   decltype(begin(static_cast<T&&>(rng)))> {
+  return begin(static_cast<T&&>(rng));
+}
+template <typename T>
+auto range_end(T&& rng) -> enable_if_t<!has_member_fn_begin_end_t<T&&>::value,
+                                       decltype(end(static_cast<T&&>(rng)))> {
+  return end(static_cast<T&&>(rng));
+}
+
+template <typename T, typename Enable = void>
+struct has_const_begin_end : std::false_type {};
+template <typename T, typename Enable = void>
+struct has_mutable_begin_end : std::false_type {};
+
+template <typename T>
+struct has_const_begin_end<
+    T,
+    void_t<
+        decltype(detail::range_begin(std::declval<const remove_cvref_t<T>&>())),
+        decltype(detail::range_end(std::declval<const remove_cvref_t<T>&>()))>>
+    : std::true_type {};
+
+template <typename T>
+struct has_mutable_begin_end<
+    T, void_t<decltype(detail::range_begin(std::declval<T>())),
+              decltype(detail::range_end(std::declval<T>())),
+              // the extra int here is because older versions of MSVC don't
+              // SFINAE properly unless there are distinct types
+              int>> : std::true_type {};
+
+template <typename T>
+struct is_range_<T, void>
+    : std::integral_constant<bool, (has_const_begin_end<T>::value ||
+                                    has_mutable_begin_end<T>::value)> {};
+#  undef FMT_DECLTYPE_RETURN
+#endif
+
+// tuple_size and tuple_element check.
+template <typename T> class is_tuple_like_ {
+  template <typename U>
+  static auto check(U* p) -> decltype(std::tuple_size<U>::value, int());
+  template <typename> static void check(...);
+
+ public:
+  static constexpr const bool value =
+      !std::is_void<decltype(check<T>(nullptr))>::value;
+};
+
+// Check for integer_sequence
+#if defined(__cpp_lib_integer_sequence) || FMT_MSC_VERSION >= 1900
+template <typename T, T... N>
+using integer_sequence = std::integer_sequence<T, N...>;
+template <size_t... N> using index_sequence = std::index_sequence<N...>;
+template <size_t N> using make_index_sequence = std::make_index_sequence<N>;
+#else
+template <typename T, T... N> struct integer_sequence {
+  using value_type = T;
+
+  static FMT_CONSTEXPR size_t size() { return sizeof...(N); }
+};
+
+template <size_t... N> using index_sequence = integer_sequence<size_t, N...>;
+
+template <typename T, size_t N, T... Ns>
+struct make_integer_sequence : make_integer_sequence<T, N - 1, N - 1, Ns...> {};
+template <typename T, T... Ns>
+struct make_integer_sequence<T, 0, Ns...> : integer_sequence<T, Ns...> {};
+
+template <size_t N>
+using make_index_sequence = make_integer_sequence<size_t, N>;
+#endif
+
+template <typename T>
+using tuple_index_sequence = make_index_sequence<std::tuple_size<T>::value>;
+
+template <typename T, typename C, bool = is_tuple_like_<T>::value>
+class is_tuple_formattable_ {
+ public:
+  static constexpr const bool value = false;
+};
+template <typename T, typename C> class is_tuple_formattable_<T, C, true> {
+  template <std::size_t... Is>
+  static std::true_type check2(index_sequence<Is...>,
+                               integer_sequence<bool, (Is == Is)...>);
+  static std::false_type check2(...);
+  template <std::size_t... Is>
+  static decltype(check2(
+      index_sequence<Is...>{},
+      integer_sequence<
+          bool, (is_formattable<typename std::tuple_element<Is, T>::type,
+                                C>::value)...>{})) check(index_sequence<Is...>);
+
+ public:
+  static constexpr const bool value =
+      decltype(check(tuple_index_sequence<T>{}))::value;
+};
+
+template <typename Tuple, typename F, size_t... Is>
+FMT_CONSTEXPR void for_each(index_sequence<Is...>, Tuple&& t, F&& f) {
+  using std::get;
+  // Using a free function get<Is>(Tuple) now.
+  const int unused[] = {0, ((void)f(get<Is>(t)), 0)...};
+  ignore_unused(unused);
+}
+
+template <typename Tuple, typename F>
+FMT_CONSTEXPR void for_each(Tuple&& t, F&& f) {
+  for_each(tuple_index_sequence<remove_cvref_t<Tuple>>(),
+           std::forward<Tuple>(t), std::forward<F>(f));
+}
+
+template <typename Tuple1, typename Tuple2, typename F, size_t... Is>
+void for_each2(index_sequence<Is...>, Tuple1&& t1, Tuple2&& t2, F&& f) {
+  using std::get;
+  const int unused[] = {0, ((void)f(get<Is>(t1), get<Is>(t2)), 0)...};
+  ignore_unused(unused);
+}
+
+template <typename Tuple1, typename Tuple2, typename F>
+void for_each2(Tuple1&& t1, Tuple2&& t2, F&& f) {
+  for_each2(tuple_index_sequence<remove_cvref_t<Tuple1>>(),
+            std::forward<Tuple1>(t1), std::forward<Tuple2>(t2),
+            std::forward<F>(f));
+}
+
+namespace tuple {
+// Workaround a bug in MSVC 2019 (v140).
+template <typename Char, typename... T>
+using result_t = std::tuple<formatter<remove_cvref_t<T>, Char>...>;
+
+using std::get;
+template <typename Tuple, typename Char, std::size_t... Is>
+auto get_formatters(index_sequence<Is...>)
+    -> result_t<Char, decltype(get<Is>(std::declval<Tuple>()))...>;
+}  // namespace tuple
+
+#if FMT_MSC_VERSION && FMT_MSC_VERSION < 1920
+// Older MSVC doesn't get the reference type correctly for arrays.
+template <typename R> struct range_reference_type_impl {
+  using type = decltype(*detail::range_begin(std::declval<R&>()));
+};
+
+template <typename T, std::size_t N> struct range_reference_type_impl<T[N]> {
+  using type = T&;
+};
+
+template <typename T>
+using range_reference_type = typename range_reference_type_impl<T>::type;
+#else
+template <typename Range>
+using range_reference_type =
+    decltype(*detail::range_begin(std::declval<Range&>()));
+#endif
+
+// We don't use the Range's value_type for anything, but we do need the Range's
+// reference type, with cv-ref stripped.
+template <typename Range>
+using uncvref_type = remove_cvref_t<range_reference_type<Range>>;
+
+template <typename Formatter>
+FMT_CONSTEXPR auto maybe_set_debug_format(Formatter& f, bool set)
+    -> decltype(f.set_debug_format(set)) {
+  f.set_debug_format(set);
+}
+template <typename Formatter>
+FMT_CONSTEXPR void maybe_set_debug_format(Formatter&, ...) {}
+
+// These are not generic lambdas for compatibility with C++11.
+template <typename ParseContext> struct parse_empty_specs {
+  template <typename Formatter> FMT_CONSTEXPR void operator()(Formatter& f) {
+    f.parse(ctx);
+    detail::maybe_set_debug_format(f, true);
+  }
+  ParseContext& ctx;
+};
+template <typename FormatContext> struct format_tuple_element {
+  using char_type = typename FormatContext::char_type;
+
+  template <typename T>
+  void operator()(const formatter<T, char_type>& f, const T& v) {
+    if (i > 0)
+      ctx.advance_to(detail::copy_str<char_type>(separator, ctx.out()));
+    ctx.advance_to(f.format(v, ctx));
+    ++i;
+  }
+
+  int i;
+  FormatContext& ctx;
+  basic_string_view<char_type> separator;
+};
+
+}  // namespace detail
+
+template <typename T> struct is_tuple_like {
+  static constexpr const bool value =
+      detail::is_tuple_like_<T>::value && !detail::is_range_<T>::value;
+};
+
+template <typename T, typename C> struct is_tuple_formattable {
+  static constexpr const bool value =
+      detail::is_tuple_formattable_<T, C>::value;
+};
+
+template <typename Tuple, typename Char>
+struct formatter<Tuple, Char,
+                 enable_if_t<fmt::is_tuple_like<Tuple>::value &&
+                             fmt::is_tuple_formattable<Tuple, Char>::value>> {
+ private:
+  decltype(detail::tuple::get_formatters<Tuple, Char>(
+      detail::tuple_index_sequence<Tuple>())) formatters_;
+
+  basic_string_view<Char> separator_ = detail::string_literal<Char, ',', ' '>{};
+  basic_string_view<Char> opening_bracket_ =
+      detail::string_literal<Char, '('>{};
+  basic_string_view<Char> closing_bracket_ =
+      detail::string_literal<Char, ')'>{};
+
+ public:
+  FMT_CONSTEXPR formatter() {}
+
+  FMT_CONSTEXPR void set_separator(basic_string_view<Char> sep) {
+    separator_ = sep;
+  }
+
+  FMT_CONSTEXPR void set_brackets(basic_string_view<Char> open,
+                                  basic_string_view<Char> close) {
+    opening_bracket_ = open;
+    closing_bracket_ = close;
+  }
+
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    auto it = ctx.begin();
+    if (it != ctx.end() && *it != '}')
+      FMT_THROW(format_error("invalid format specifier"));
+    detail::for_each(formatters_, detail::parse_empty_specs<ParseContext>{ctx});
+    return it;
+  }
+
+  template <typename FormatContext>
+  auto format(const Tuple& value, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    ctx.advance_to(detail::copy_str<Char>(opening_bracket_, ctx.out()));
+    detail::for_each2(
+        formatters_, value,
+        detail::format_tuple_element<FormatContext>{0, ctx, separator_});
+    return detail::copy_str<Char>(closing_bracket_, ctx.out());
+  }
+};
+
+template <typename T, typename Char> struct is_range {
+  static constexpr const bool value =
+      detail::is_range_<T>::value && !detail::is_std_string_like<T>::value &&
+      !std::is_convertible<T, std::basic_string<Char>>::value &&
+      !std::is_convertible<T, detail::std_string_view<Char>>::value;
+};
+
+namespace detail {
+template <typename Context> struct range_mapper {
+  using mapper = arg_mapper<Context>;
+
+  template <typename T,
+            FMT_ENABLE_IF(has_formatter<remove_cvref_t<T>, Context>::value)>
+  static auto map(T&& value) -> T&& {
+    return static_cast<T&&>(value);
+  }
+  template <typename T,
+            FMT_ENABLE_IF(!has_formatter<remove_cvref_t<T>, Context>::value)>
+  static auto map(T&& value)
+      -> decltype(mapper().map(static_cast<T&&>(value))) {
+    return mapper().map(static_cast<T&&>(value));
+  }
+};
+
+template <typename Char, typename Element>
+using range_formatter_type =
+    formatter<remove_cvref_t<decltype(range_mapper<buffer_context<Char>>{}.map(
+                  std::declval<Element>()))>,
+              Char>;
+
+template <typename R>
+using maybe_const_range =
+    conditional_t<has_const_begin_end<R>::value, const R, R>;
+
+// Workaround a bug in MSVC 2015 and earlier.
+#if !FMT_MSC_VERSION || FMT_MSC_VERSION >= 1910
+template <typename R, typename Char>
+struct is_formattable_delayed
+    : is_formattable<uncvref_type<maybe_const_range<R>>, Char> {};
+#endif
+}  // namespace detail
+
+template <typename T, typename Char, typename Enable = void>
+struct range_formatter;
+
+template <typename T, typename Char>
+struct range_formatter<
+    T, Char,
+    enable_if_t<conjunction<std::is_same<T, remove_cvref_t<T>>,
+                            is_formattable<T, Char>>::value>> {
+ private:
+  detail::range_formatter_type<Char, T> underlying_;
+  basic_string_view<Char> separator_ = detail::string_literal<Char, ',', ' '>{};
+  basic_string_view<Char> opening_bracket_ =
+      detail::string_literal<Char, '['>{};
+  basic_string_view<Char> closing_bracket_ =
+      detail::string_literal<Char, ']'>{};
+
+ public:
+  FMT_CONSTEXPR range_formatter() {}
+
+  FMT_CONSTEXPR auto underlying() -> detail::range_formatter_type<Char, T>& {
+    return underlying_;
+  }
+
+  FMT_CONSTEXPR void set_separator(basic_string_view<Char> sep) {
+    separator_ = sep;
+  }
+
+  FMT_CONSTEXPR void set_brackets(basic_string_view<Char> open,
+                                  basic_string_view<Char> close) {
+    opening_bracket_ = open;
+    closing_bracket_ = close;
+  }
+
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    auto it = ctx.begin();
+    auto end = ctx.end();
+
+    if (it != end && *it == 'n') {
+      set_brackets({}, {});
+      ++it;
+    }
+
+    if (it != end && *it != '}') {
+      if (*it != ':') FMT_THROW(format_error("invalid format specifier"));
+      ++it;
+    } else {
+      detail::maybe_set_debug_format(underlying_, true);
+    }
+
+    ctx.advance_to(it);
+    return underlying_.parse(ctx);
+  }
+
+  template <typename R, typename FormatContext>
+  auto format(R&& range, FormatContext& ctx) const -> decltype(ctx.out()) {
+    detail::range_mapper<buffer_context<Char>> mapper;
+    auto out = ctx.out();
+    out = detail::copy_str<Char>(opening_bracket_, out);
+    int i = 0;
+    auto it = detail::range_begin(range);
+    auto end = detail::range_end(range);
+    for (; it != end; ++it) {
+      if (i > 0) out = detail::copy_str<Char>(separator_, out);
+      ctx.advance_to(out);
+      out = underlying_.format(mapper.map(*it), ctx);
+      ++i;
+    }
+    out = detail::copy_str<Char>(closing_bracket_, out);
+    return out;
+  }
+};
+
+enum class range_format { disabled, map, set, sequence, string, debug_string };
+
+namespace detail {
+template <typename T>
+struct range_format_kind_
+    : std::integral_constant<range_format,
+                             std::is_same<uncvref_type<T>, T>::value
+                                 ? range_format::disabled
+                             : is_map<T>::value ? range_format::map
+                             : is_set<T>::value ? range_format::set
+                                                : range_format::sequence> {};
+
+template <range_format K, typename R, typename Char, typename Enable = void>
+struct range_default_formatter;
+
+template <range_format K>
+using range_format_constant = std::integral_constant<range_format, K>;
+
+template <range_format K, typename R, typename Char>
+struct range_default_formatter<
+    K, R, Char,
+    enable_if_t<(K == range_format::sequence || K == range_format::map ||
+                 K == range_format::set)>> {
+  using range_type = detail::maybe_const_range<R>;
+  range_formatter<detail::uncvref_type<range_type>, Char> underlying_;
+
+  FMT_CONSTEXPR range_default_formatter() { init(range_format_constant<K>()); }
+
+  FMT_CONSTEXPR void init(range_format_constant<range_format::set>) {
+    underlying_.set_brackets(detail::string_literal<Char, '{'>{},
+                             detail::string_literal<Char, '}'>{});
+  }
+
+  FMT_CONSTEXPR void init(range_format_constant<range_format::map>) {
+    underlying_.set_brackets(detail::string_literal<Char, '{'>{},
+                             detail::string_literal<Char, '}'>{});
+    underlying_.underlying().set_brackets({}, {});
+    underlying_.underlying().set_separator(
+        detail::string_literal<Char, ':', ' '>{});
+  }
+
+  FMT_CONSTEXPR void init(range_format_constant<range_format::sequence>) {}
+
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return underlying_.parse(ctx);
+  }
+
+  template <typename FormatContext>
+  auto format(range_type& range, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return underlying_.format(range, ctx);
+  }
+};
+}  // namespace detail
+
+template <typename T, typename Char, typename Enable = void>
+struct range_format_kind
+    : conditional_t<
+          is_range<T, Char>::value, detail::range_format_kind_<T>,
+          std::integral_constant<range_format, range_format::disabled>> {};
+
+template <typename R, typename Char>
+struct formatter<
+    R, Char,
+    enable_if_t<conjunction<bool_constant<range_format_kind<R, Char>::value !=
+                                          range_format::disabled>
+// Workaround a bug in MSVC 2015 and earlier.
+#if !FMT_MSC_VERSION || FMT_MSC_VERSION >= 1910
+                            ,
+                            detail::is_formattable_delayed<R, Char>
+#endif
+                            >::value>>
+    : detail::range_default_formatter<range_format_kind<R, Char>::value, R,
+                                      Char> {
+};
+
+template <typename Char, typename... T> struct tuple_join_view : detail::view {
+  const std::tuple<T...>& tuple;
+  basic_string_view<Char> sep;
+
+  tuple_join_view(const std::tuple<T...>& t, basic_string_view<Char> s)
+      : tuple(t), sep{s} {}
+};
+
+// Define FMT_TUPLE_JOIN_SPECIFIERS to enable experimental format specifiers
+// support in tuple_join. It is disabled by default because of issues with
+// the dynamic width and precision.
+#ifndef FMT_TUPLE_JOIN_SPECIFIERS
+#  define FMT_TUPLE_JOIN_SPECIFIERS 0
+#endif
+
+template <typename Char, typename... T>
+struct formatter<tuple_join_view<Char, T...>, Char> {
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return do_parse(ctx, std::integral_constant<size_t, sizeof...(T)>());
+  }
+
+  template <typename FormatContext>
+  auto format(const tuple_join_view<Char, T...>& value,
+              FormatContext& ctx) const -> typename FormatContext::iterator {
+    return do_format(value, ctx,
+                     std::integral_constant<size_t, sizeof...(T)>());
+  }
+
+ private:
+  std::tuple<formatter<typename std::decay<T>::type, Char>...> formatters_;
+
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto do_parse(ParseContext& ctx,
+                              std::integral_constant<size_t, 0>)
+      -> decltype(ctx.begin()) {
+    return ctx.begin();
+  }
+
+  template <typename ParseContext, size_t N>
+  FMT_CONSTEXPR auto do_parse(ParseContext& ctx,
+                              std::integral_constant<size_t, N>)
+      -> decltype(ctx.begin()) {
+    auto end = ctx.begin();
+#if FMT_TUPLE_JOIN_SPECIFIERS
+    end = std::get<sizeof...(T) - N>(formatters_).parse(ctx);
+    if (N > 1) {
+      auto end1 = do_parse(ctx, std::integral_constant<size_t, N - 1>());
+      if (end != end1)
+        FMT_THROW(format_error("incompatible format specs for tuple elements"));
+    }
+#endif
+    return end;
+  }
+
+  template <typename FormatContext>
+  auto do_format(const tuple_join_view<Char, T...>&, FormatContext& ctx,
+                 std::integral_constant<size_t, 0>) const ->
+      typename FormatContext::iterator {
+    return ctx.out();
+  }
+
+  template <typename FormatContext, size_t N>
+  auto do_format(const tuple_join_view<Char, T...>& value, FormatContext& ctx,
+                 std::integral_constant<size_t, N>) const ->
+      typename FormatContext::iterator {
+    auto out = std::get<sizeof...(T) - N>(formatters_)
+                   .format(std::get<sizeof...(T) - N>(value.tuple), ctx);
+    if (N > 1) {
+      out = std::copy(value.sep.begin(), value.sep.end(), out);
+      ctx.advance_to(out);
+      return do_format(value, ctx, std::integral_constant<size_t, N - 1>());
+    }
+    return out;
+  }
+};
+
+namespace detail {
+// Check if T has an interface like a container adaptor (e.g. std::stack,
+// std::queue, std::priority_queue).
+template <typename T> class is_container_adaptor_like {
+  template <typename U> static auto check(U* p) -> typename U::container_type;
+  template <typename> static void check(...);
+
+ public:
+  static constexpr const bool value =
+      !std::is_void<decltype(check<T>(nullptr))>::value;
+};
+
+template <typename Container> struct all {
+  const Container& c;
+  auto begin() const -> typename Container::const_iterator { return c.begin(); }
+  auto end() const -> typename Container::const_iterator { return c.end(); }
+};
+}  // namespace detail
+
+template <typename T, typename Char>
+struct formatter<
+    T, Char,
+    enable_if_t<conjunction<detail::is_container_adaptor_like<T>,
+                            bool_constant<range_format_kind<T, Char>::value ==
+                                          range_format::disabled>>::value>>
+    : formatter<detail::all<typename T::container_type>, Char> {
+  using all = detail::all<typename T::container_type>;
+  template <typename FormatContext>
+  auto format(const T& t, FormatContext& ctx) const -> decltype(ctx.out()) {
+    struct getter : T {
+      static auto get(const T& t) -> all {
+        return {t.*(&getter::c)};  // Access c through the derived class.
+      }
+    };
+    return formatter<all>::format(getter::get(t), ctx);
+  }
+};
+
+FMT_BEGIN_EXPORT
+
+/**
+  \rst
+  Returns an object that formats `tuple` with elements separated by `sep`.
+
+  **Example**::
+
+    std::tuple<int, char> t = {1, 'a'};
+    fmt::print("{}", fmt::join(t, ", "));
+    // Output: "1, a"
+  \endrst
+ */
+template <typename... T>
+FMT_CONSTEXPR auto join(const std::tuple<T...>& tuple, string_view sep)
+    -> tuple_join_view<char, T...> {
+  return {tuple, sep};
+}
+
+template <typename... T>
+FMT_CONSTEXPR auto join(const std::tuple<T...>& tuple,
+                        basic_string_view<wchar_t> sep)
+    -> tuple_join_view<wchar_t, T...> {
+  return {tuple, sep};
+}
+
+/**
+  \rst
+  Returns an object that formats `initializer_list` with elements separated by
+  `sep`.
+
+  **Example**::
+
+    fmt::print("{}", fmt::join({1, 2, 3}, ", "));
+    // Output: "1, 2, 3"
+  \endrst
+ */
+template <typename T>
+auto join(std::initializer_list<T> list, string_view sep)
+    -> join_view<const T*, const T*> {
+  return join(std::begin(list), std::end(list), sep);
+}
+
+FMT_END_EXPORT
+FMT_END_NAMESPACE
+
+#endif  // FMT_RANGES_H_
diff --git a/Genie/Genie/src/qualla/include/fp16/bitcasts.h b/Genie/Genie/src/qualla/include/fp16/bitcasts.h
new file mode 100644
index 0000000000000000000000000000000000000000..86a4e22c48b2a544e5ea0201b67903c0b00fa59d
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/fp16/bitcasts.h
@@ -0,0 +1,92 @@
+#pragma once
+#ifndef FP16_BITCASTS_H
+#define FP16_BITCASTS_H
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+	#include <cstdint>
+#elif !defined(__OPENCL_VERSION__)
+	#include <stdint.h>
+#endif
+
+#if defined(__INTEL_COMPILER)
+	#include <immintrin.h>
+#endif
+
+#if defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+	#include <intrin.h>
+#endif
+
+
+static inline float fp32_from_bits(uint32_t w) {
+#if defined(__OPENCL_VERSION__)
+	return as_float(w);
+#elif defined(__CUDA_ARCH__)
+	return __uint_as_float((unsigned int) w);
+#elif defined(__INTEL_COMPILER)
+	return _castu32_f32(w);
+#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+	return _CopyFloatFromInt32((__int32) w);
+#else
+	union {
+		uint32_t as_bits;
+		float as_value;
+	} fp32 = { w };
+	return fp32.as_value;
+#endif
+}
+
+static inline uint32_t fp32_to_bits(float f) {
+#if defined(__OPENCL_VERSION__)
+	return as_uint(f);
+#elif defined(__CUDA_ARCH__)
+	return (uint32_t) __float_as_uint(f);
+#elif defined(__INTEL_COMPILER)
+	return _castf32_u32(f);
+#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+	return (uint32_t) _CopyInt32FromFloat(f);
+#else
+	union {
+		float as_value;
+		uint32_t as_bits;
+	} fp32 = { f };
+	return fp32.as_bits;
+#endif
+}
+
+static inline double fp64_from_bits(uint64_t w) {
+#if defined(__OPENCL_VERSION__)
+	return as_double(w);
+#elif defined(__CUDA_ARCH__)
+	return __longlong_as_double((long long) w);
+#elif defined(__INTEL_COMPILER)
+	return _castu64_f64(w);
+#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+	return _CopyDoubleFromInt64((__int64) w);
+#else
+	union {
+		uint64_t as_bits;
+		double as_value;
+	} fp64 = { w };
+	return fp64.as_value;
+#endif
+}
+
+static inline uint64_t fp64_to_bits(double f) {
+#if defined(__OPENCL_VERSION__)
+	return as_ulong(f);
+#elif defined(__CUDA_ARCH__)
+	return (uint64_t) __double_as_longlong(f);
+#elif defined(__INTEL_COMPILER)
+	return _castf64_u64(f);
+#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+	return (uint64_t) _CopyInt64FromDouble(f);
+#else
+	union {
+		double as_value;
+		uint64_t as_bits;
+	} fp64 = { f };
+	return fp64.as_bits;
+#endif
+}
+
+#endif /* FP16_BITCASTS_H */
diff --git a/Genie/Genie/src/qualla/include/fp16/fp16.h b/Genie/Genie/src/qualla/include/fp16/fp16.h
new file mode 100644
index 0000000000000000000000000000000000000000..2b61fff5c1b9a6bde62d84f2a8428b6fcfdada42
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/fp16/fp16.h
@@ -0,0 +1,451 @@
+#pragma once
+#ifndef FP16_FP16_H
+#define FP16_FP16_H
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+	#include <cstdint>
+	#include <cmath>
+#elif !defined(__OPENCL_VERSION__)
+	#include <stdint.h>
+	#include <math.h>
+#endif
+
+#ifdef _MSC_VER
+	#include <intrin.h>
+#endif
+
+#include <fp16/bitcasts.h>
+
+
+/*
+ * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to
+ * a 32-bit floating-point number in IEEE single-precision format, in bit representation.
+ *
+ * @note The implementation doesn't use any floating-point operations.
+ */
+static inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) {
+	/*
+	 * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
+	 *      +---+-----+------------+-------------------+
+	 *      | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+	 *      +---+-----+------------+-------------------+
+	 * Bits  31  26-30    16-25            0-15
+	 *
+	 * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
+	 */
+	const uint32_t w = (uint32_t) h << 16;
+	/*
+	 * Extract the sign of the input number into the high bit of the 32-bit word:
+	 *
+	 *      +---+----------------------------------+
+	 *      | S |0000000 00000000 00000000 00000000|
+	 *      +---+----------------------------------+
+	 * Bits  31                 0-31
+	 */
+	const uint32_t sign = w & UINT32_C(0x80000000);
+	/*
+	 * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word:
+	 *
+	 *      +---+-----+------------+-------------------+
+	 *      | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+	 *      +---+-----+------------+-------------------+
+	 * Bits  30  27-31     17-26            0-16
+	 */
+	const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF);
+	/*
+	 * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized.
+	 * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one.
+	 * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift
+	 * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the
+	 * biased exponent into 1, and making mantissa normalized (i.e. without leading 1).
+	 */
+#ifdef _MSC_VER
+	unsigned long nonsign_bsr;
+	_BitScanReverse(&nonsign_bsr, (unsigned long) nonsign);
+	uint32_t renorm_shift = (uint32_t) nonsign_bsr ^ 31;
+#else
+	uint32_t renorm_shift = __builtin_clz(nonsign);
+#endif
+	renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0;
+	/*
+	 * Iff half-precision number has exponent of 15, the addition overflows it into bit 31,
+	 * and the subsequent shift turns the high 9 bits into 1. Thus
+	 *   inf_nan_mask ==
+	 *                   0x7F800000 if the half-precision number had exponent of 15 (i.e. was NaN or infinity)
+	 *                   0x00000000 otherwise
+	 */
+	const int32_t inf_nan_mask = ((int32_t) (nonsign + 0x04000000) >> 8) & INT32_C(0x7F800000);
+	/*
+	 * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31 remains 0.
+	 * The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus
+	 *   zero_mask ==
+	 *                0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h)
+	 *                0x00000000 otherwise
+	 */
+	const int32_t zero_mask = (int32_t) (nonsign - 1) >> 31;
+	/*
+	 * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal)
+	 * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and 10-bit mantissa
+	 *    shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision number.
+	 * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias
+	 *    (0x7F for single-precision number less 0xF for half-precision number).
+	 * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization. As renorm_shift
+	 *    is less than 0x70, this can be combined with step 3.
+	 * 5. Binary OR with inf_nan_mask to turn the exponent into 0xFF if the input was NaN or infinity.
+	 * 6. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was zero. 
+	 * 7. Combine with the sign of the input number.
+	 */
+	return sign | ((((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) | inf_nan_mask) & ~zero_mask);
+}
+
+/*
+ * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to
+ * a 32-bit floating-point number in IEEE single-precision format.
+ *
+ * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
+ * floating-point operations and bitcasts between integer and floating-point variables.
+ */
+static inline float fp16_ieee_to_fp32_value(uint16_t h) {
+	/*
+	 * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
+	 *      +---+-----+------------+-------------------+
+	 *      | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+	 *      +---+-----+------------+-------------------+
+	 * Bits  31  26-30    16-25            0-15
+	 *
+	 * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
+	 */
+	const uint32_t w = (uint32_t) h << 16;
+	/*
+	 * Extract the sign of the input number into the high bit of the 32-bit word:
+	 *
+	 *      +---+----------------------------------+
+	 *      | S |0000000 00000000 00000000 00000000|
+	 *      +---+----------------------------------+
+	 * Bits  31                 0-31
+	 */
+	const uint32_t sign = w & UINT32_C(0x80000000);
+	/*
+	 * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word:
+	 *
+	 *      +-----+------------+---------------------+
+	 *      |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000|
+	 *      +-----+------------+---------------------+
+	 * Bits  27-31    17-26            0-16
+	 */
+	const uint32_t two_w = w + w;
+
+	/*
+	 * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent
+	 * of a single-precision floating-point number:
+	 *
+	 *       S|Exponent |          Mantissa
+	 *      +-+---+-----+------------+----------------+
+	 *      |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000|
+	 *      +-+---+-----+------------+----------------+
+	 * Bits   | 23-31   |           0-22
+	 *
+	 * Next, there are some adjustments to the exponent:
+	 * - The exponent needs to be corrected by the difference in exponent bias between single-precision and half-precision
+	 *   formats (0x7F - 0xF = 0x70)
+	 * - Inf and NaN values in the inputs should become Inf and NaN values after conversion to the single-precision number.
+	 *   Therefore, if the biased exponent of the half-precision input was 0x1F (max possible value), the biased exponent
+	 *   of the single-precision output must be 0xFF (max possible value). We do this correction in two steps:
+	 *   - First, we adjust the exponent by (0xFF - 0x1F) = 0xE0 (see exp_offset below) rather than by 0x70 suggested
+	 *     by the difference in the exponent bias (see above).
+	 *   - Then we multiply the single-precision result of exponent adjustment by 2**(-112) to reverse the effect of
+	 *     exponent adjustment by 0xE0 less the necessary exponent adjustment by 0x70 due to difference in exponent bias.
+	 *     The floating-point multiplication hardware would ensure than Inf and NaN would retain their value on at least
+	 *     partially IEEE754-compliant implementations.
+	 *
+	 * Note that the above operations do not handle denormal inputs (where biased exponent == 0). However, they also do not
+	 * operate on denormal inputs, and do not produce denormal results.
+	 */
+	const uint32_t exp_offset = UINT32_C(0xE0) << 23;
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
+	const float exp_scale = 0x1.0p-112f;
+#else
+	const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
+#endif
+	const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
+
+	/*
+	 * Convert denormalized half-precision inputs into single-precision results (always normalized).
+	 * Zero inputs are also handled here.
+	 *
+	 * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits.
+	 * First, we shift mantissa into bits 0-9 of the 32-bit word.
+	 *
+	 *                  zeros           |  mantissa
+	 *      +---------------------------+------------+
+	 *      |0000 0000 0000 0000 0000 00|MM MMMM MMMM|
+	 *      +---------------------------+------------+
+	 * Bits             10-31                0-9
+	 *
+	 * Now, remember that denormalized half-precision numbers are represented as:
+	 *    FP16 = mantissa * 2**(-24).
+	 * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input
+	 * and with an exponent which would scale the corresponding mantissa bits to 2**(-24).
+	 * A normalized single-precision floating-point number is represented as:
+	 *    FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127)
+	 * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision
+	 * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount.
+	 *
+	 * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number
+	 * is zero, the constructed single-precision number has the value of
+	 *    FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5
+	 * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of
+	 * the input half-precision number.
+	 */
+	const uint32_t magic_mask = UINT32_C(126) << 23;
+	const float magic_bias = 0.5f;
+	const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
+
+	/*
+	 * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the
+	 *   input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the
+	 *   input is either a denormal number, or zero.
+	 * - Combine the result of conversion of exponent and mantissa with the sign of the input number.
+	 */
+	const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
+	const uint32_t result = sign |
+		(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
+	return fp32_from_bits(result);
+}
+
+/*
+ * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in
+ * IEEE half-precision format, in bit representation.
+ *
+ * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
+ * floating-point operations and bitcasts between integer and floating-point variables.
+ */
+static inline uint16_t fp16_ieee_from_fp32_value(float f) {
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
+	const float scale_to_inf = 0x1.0p+112f;
+	const float scale_to_zero = 0x1.0p-110f;
+#else
+	const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
+	const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
+#endif
+	float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
+
+	const uint32_t w = fp32_to_bits(f);
+	const uint32_t shl1_w = w + w;
+	const uint32_t sign = w & UINT32_C(0x80000000);
+	uint32_t bias = shl1_w & UINT32_C(0xFF000000);
+	if (bias < UINT32_C(0x71000000)) {
+		bias = UINT32_C(0x71000000);
+	}
+
+	base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
+	const uint32_t bits = fp32_to_bits(base);
+	const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
+	const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
+	const uint32_t nonsign = exp_bits + mantissa_bits;
+	return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
+}
+
+/*
+ * Convert a 16-bit floating-point number in ARM alternative half-precision format, in bit representation, to
+ * a 32-bit floating-point number in IEEE single-precision format, in bit representation.
+ *
+ * @note The implementation doesn't use any floating-point operations.
+ */
+static inline uint32_t fp16_alt_to_fp32_bits(uint16_t h) {
+	/*
+	 * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
+	 *      +---+-----+------------+-------------------+
+	 *      | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+	 *      +---+-----+------------+-------------------+
+	 * Bits  31  26-30    16-25            0-15
+	 *
+	 * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
+	 */
+	const uint32_t w = (uint32_t) h << 16;
+	/*
+	 * Extract the sign of the input number into the high bit of the 32-bit word:
+	 *
+	 *      +---+----------------------------------+
+	 *      | S |0000000 00000000 00000000 00000000|
+	 *      +---+----------------------------------+
+	 * Bits  31                 0-31
+	 */
+	const uint32_t sign = w & UINT32_C(0x80000000);
+	/*
+	 * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word:
+	 *
+	 *      +---+-----+------------+-------------------+
+	 *      | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+	 *      +---+-----+------------+-------------------+
+	 * Bits  30  27-31     17-26            0-16
+	 */
+	const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF);
+	/*
+	 * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized.
+	 * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one.
+	 * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift
+	 * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the
+	 * biased exponent into 1, and making mantissa normalized (i.e. without leading 1).
+	 */
+#ifdef _MSC_VER
+	unsigned long nonsign_bsr;
+	_BitScanReverse(&nonsign_bsr, (unsigned long) nonsign);
+	uint32_t renorm_shift = (uint32_t) nonsign_bsr ^ 31;
+#else
+	uint32_t renorm_shift = __builtin_clz(nonsign);
+#endif
+	renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0;
+	/*
+	 * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31 remains 0.
+	 * The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus
+	 *   zero_mask ==
+	 *                0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h)
+	 *                0x00000000 otherwise
+	 */
+	const int32_t zero_mask = (int32_t) (nonsign - 1) >> 31;
+	/*
+	 * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal)
+	 * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and 10-bit mantissa
+	 *    shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision number.
+	 * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias
+	 *    (0x7F for single-precision number less 0xF for half-precision number).
+	 * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization. As renorm_shift
+	 *    is less than 0x70, this can be combined with step 3.
+	 * 5. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was zero. 
+	 * 6. Combine with the sign of the input number.
+	 */
+	return sign | (((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) & ~zero_mask);
+}
+
+/*
+ * Convert a 16-bit floating-point number in ARM alternative half-precision format, in bit representation, to
+ * a 32-bit floating-point number in IEEE single-precision format.
+ *
+ * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
+ * floating-point operations and bitcasts between integer and floating-point variables.
+ */
+static inline float fp16_alt_to_fp32_value(uint16_t h) {
+	/*
+	 * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
+	 *      +---+-----+------------+-------------------+
+	 *      | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+	 *      +---+-----+------------+-------------------+
+	 * Bits  31  26-30    16-25            0-15
+	 *
+	 * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
+	 */
+	const uint32_t w = (uint32_t) h << 16;
+	/*
+	 * Extract the sign of the input number into the high bit of the 32-bit word:
+	 *
+	 *      +---+----------------------------------+
+	 *      | S |0000000 00000000 00000000 00000000|
+	 *      +---+----------------------------------+
+	 * Bits  31                 0-31
+	 */
+	const uint32_t sign = w & UINT32_C(0x80000000);
+	/*
+	 * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word:
+	 *
+	 *      +-----+------------+---------------------+
+	 *      |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000|
+	 *      +-----+------------+---------------------+
+	 * Bits  27-31    17-26            0-16
+	 */
+	const uint32_t two_w = w + w;
+
+	/*
+	 * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent
+	 * of a single-precision floating-point number:
+	 *
+	 *       S|Exponent |          Mantissa
+	 *      +-+---+-----+------------+----------------+
+	 *      |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000|
+	 *      +-+---+-----+------------+----------------+
+	 * Bits   | 23-31   |           0-22
+	 *
+	 * Next, the exponent is adjusted for the difference in exponent bias between single-precision and half-precision
+	 * formats (0x7F - 0xF = 0x70). This operation never overflows or generates non-finite values, as the largest
+	 * half-precision exponent is 0x1F and after the adjustment is can not exceed 0x8F < 0xFE (largest single-precision
+	 * exponent for non-finite values).
+	 *
+	 * Note that this operation does not handle denormal inputs (where biased exponent == 0). However, they also do not
+	 * operate on denormal inputs, and do not produce denormal results.
+	 */
+	const uint32_t exp_offset = UINT32_C(0x70) << 23;
+	const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset);
+
+	/*
+	 * Convert denormalized half-precision inputs into single-precision results (always normalized).
+	 * Zero inputs are also handled here.
+	 *
+	 * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits.
+	 * First, we shift mantissa into bits 0-9 of the 32-bit word.
+	 *
+	 *                  zeros           |  mantissa
+	 *      +---------------------------+------------+
+	 *      |0000 0000 0000 0000 0000 00|MM MMMM MMMM|
+	 *      +---------------------------+------------+
+	 * Bits             10-31                0-9
+	 *
+	 * Now, remember that denormalized half-precision numbers are represented as:
+	 *    FP16 = mantissa * 2**(-24).
+	 * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input
+	 * and with an exponent which would scale the corresponding mantissa bits to 2**(-24).
+	 * A normalized single-precision floating-point number is represented as:
+	 *    FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127)
+	 * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision
+	 * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount.
+	 *
+	 * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number
+	 * is zero, the constructed single-precision number has the value of
+	 *    FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5
+	 * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of
+	 * the input half-precision number.
+	 */
+	const uint32_t magic_mask = UINT32_C(126) << 23;
+	const float magic_bias = 0.5f;
+	const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
+
+	/*
+	 * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the
+	 *   input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the
+	 *   input is either a denormal number, or zero.
+	 * - Combine the result of conversion of exponent and mantissa with the sign of the input number.
+	 */
+	const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
+	const uint32_t result = sign |
+		(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
+	return fp32_from_bits(result);
+}
+
+/*
+ * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in
+ * ARM alternative half-precision format, in bit representation.
+ *
+ * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
+ * floating-point operations and bitcasts between integer and floating-point variables.
+ */
+static inline uint16_t fp16_alt_from_fp32_value(float f) {
+	const uint32_t w = fp32_to_bits(f);
+	const uint32_t sign = w & UINT32_C(0x80000000);
+	const uint32_t shl1_w = w + w;
+
+	const uint32_t shl1_max_fp16_fp32 = UINT32_C(0x8FFFC000);
+	const uint32_t shl1_base = shl1_w > shl1_max_fp16_fp32 ? shl1_max_fp16_fp32 : shl1_w;
+	uint32_t shl1_bias = shl1_base & UINT32_C(0xFF000000);
+	const uint32_t exp_difference = 23 - 10;
+	const uint32_t shl1_bias_min = (127 - 1 - exp_difference) << 24;
+	if (shl1_bias < shl1_bias_min) {
+		shl1_bias = shl1_bias_min;
+	}
+
+	const float bias = fp32_from_bits((shl1_bias >> 1) + ((exp_difference + 2) << 23));
+	const float base = fp32_from_bits((shl1_base >> 1) + (2 << 23)) + bias;
+
+	const uint32_t exp_f = fp32_to_bits(base) >> 13;
+	return (sign >> 16) | ((exp_f & UINT32_C(0x00007C00)) + (fp32_to_bits(base) & UINT32_C(0x00000FFF)));
+}
+
+#endif /* FP16_FP16_H */
diff --git a/Genie/Genie/src/qualla/include/qualla/DialogCallback.hpp b/Genie/Genie/src/qualla/include/qualla/DialogCallback.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1f8280fe87a63668cae87ed77d75d3e076fe1e48
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/DialogCallback.hpp
@@ -0,0 +1,82 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All rights reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef DIALOGCALLBACK_HPP
+#define DIALOGCALLBACK_HPP
+
+#include <qualla/tokenizer.hpp>
+
+namespace qualla {
+
+typedef std::function<bool(const std::string&, Sentence::Code)> QueryCbFunction;
+typedef std::function<bool(const int32_t*, const uint32_t, Sentence::Code)> TokenCbFunction;
+
+typedef enum {
+  QUALLA_CALLBACK_TYPE_TEXT      = 1,
+  QUALLA_CALLBACK_TYPE_TOKEN     = 2,
+  QUALLA_CALLBACK_TYPE_UNDEFINED = 0x7fffffff
+} QuallaCallBackType;
+
+class DialogCallback {
+public:
+
+  DialogCallback() {
+    m_basicQueryCb = std::make_shared<QueryCbFunction>();
+    m_basicTokenCb = std::make_shared<TokenCbFunction>();
+  }
+
+  DialogCallback(QuallaCallBackType cbType) {
+    if (cbType == QUALLA_CALLBACK_TYPE_TEXT) {
+      m_basicQueryCb.reset(new QueryCbFunction);
+      m_callBackType = QUALLA_CALLBACK_TYPE_TEXT;
+    } else if (cbType == QUALLA_CALLBACK_TYPE_TOKEN) {
+      m_basicTokenCb.reset(new TokenCbFunction);
+      m_callBackType = QUALLA_CALLBACK_TYPE_TOKEN;
+    }
+  }
+
+  bool callBack(const int32_t* tokens, const uint32_t sizeOfTokens, Sentence::Code scode, Tokenizer& _tokenizer) {
+    if (m_callBackType == QUALLA_CALLBACK_TYPE_TEXT) {
+      if (tokens) {
+        std::vector<int32_t>tokenVec(tokens, tokens + sizeOfTokens);
+        std::string outString = _tokenizer.decode(tokenVec);
+        return (*m_basicQueryCb)(outString, scode);
+      }
+      return (*m_basicQueryCb)("", scode);
+    } else if (m_callBackType == QUALLA_CALLBACK_TYPE_TOKEN) {
+      return (*m_basicTokenCb)(tokens, sizeOfTokens, scode);
+    } else {
+      return false;
+    }
+  }
+
+  void setCallBackType (QuallaCallBackType CbType) {
+    m_callBackType = CbType;
+  }
+
+  QuallaCallBackType getCallBackType () {
+    return m_callBackType;
+  }
+
+  std::shared_ptr<QueryCbFunction> getQueryCbFunc () {
+    return m_basicQueryCb;
+  }
+
+  std::shared_ptr<TokenCbFunction> getTokenCbFunc () {
+    return m_basicTokenCb;
+  }
+
+private:
+
+  QuallaCallBackType m_callBackType;
+  std::shared_ptr<TokenCbFunction> m_basicTokenCb;
+  std::shared_ptr<QueryCbFunction> m_basicQueryCb;
+
+};
+}// namespace qualla
+#endif // DIALOGCALLBACK_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/context.hpp b/Genie/Genie/src/qualla/include/qualla/context.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..60def9ea8a99769dddb9cc824189f173bc8026f5
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/context.hpp
@@ -0,0 +1,86 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_CONTEXT_HPP
+#define QUALLA_CONTEXT_HPP
+
+#include <qualla/env.hpp>
+
+#include <qualla/detail/state.hpp>
+#include <qualla/detail/json.hpp>
+
+#include <memory>
+#include <stdint.h>
+#include <unordered_set>
+
+namespace qualla {
+
+class Context : public State {
+  public:
+    QUALLA_API Context(Env& env, const std::string& name, const json& conf);
+
+    Env& env() { return _env; }
+
+    const std::string& name() const { return _name; }
+    size_t             size() const { return _size; }
+    size_t             n_ctx() const { return _size; }
+    size_t             n_vocab() const { return _n_vocab; }
+    size_t             n_embd() const { return _n_embd; }
+
+    int32_t bos_tok() const { return _bos_tok; }
+    int32_t bos() const { return _bos_tok; }
+    int32_t eos_tok() const { return _eos_tok; }
+    int32_t eos() const { return _eos_tok; }
+    bool    is_eos(int32_t tok) const { return _eos_tok_list.find(tok) != _eos_tok_list.end(); }
+    int32_t pad_tok() const {return _pad_tok; }
+    int32_t pad() const { return _pad_tok; }
+
+    int32_t embeddingLength() const { return _embedding_length; }
+
+    const std::string& embeddingDatatype() const { return _embedding_datatype; }
+
+    const json& conf() const { return _conf; }
+
+    QUALLA_API static std::unique_ptr<Context> create(
+            Env&                env,
+            const std::string&  name,
+            const qualla::json& conf = {}
+    );
+    QUALLA_API static std::unique_ptr<Context> create(
+            Env&               env,
+            const std::string& name,
+            std::istream&      json_stream
+    );
+    QUALLA_API static std::unique_ptr<Context> create(
+            Env&               env,
+            const std::string& name,
+            const std::string& json_str
+    );
+
+  private:
+    const std::string _name; // Contex name
+    Env&              _env;  // Reference to global env
+    json              _conf; // Complete context config
+
+    size_t  _size{1024};     // Context size
+    size_t  _n_vocab{32000}; // Vocab size
+    size_t  _n_embd{4096};    // Word embedding size
+    int32_t _bos_tok{1};     // BOS token id
+    int32_t _eos_tok{-1};     // EOS token id
+    int32_t _eot_tok{-1};    // EOT token id (optional)
+    int32_t _pad_tok{-1};     // Pad token id
+    int32_t _embedding_length{-1}; // Embedding vector length. Required for E2T
+
+    std::unordered_set<int32_t> _eos_tok_list; // List of EOS tokens to stop generation
+
+    std::string _embedding_datatype{"float32"}; // E2T query input datatype. "float32" or "native".
+};
+
+} // namespace qualla
+
+#endif // QUALLA_CONTEXT_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/basic-dialog.hpp b/Genie/Genie/src/qualla/include/qualla/detail/basic-dialog.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a50d21d3c549bf13a668191acb983334a92ef6ff
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/basic-dialog.hpp
@@ -0,0 +1,42 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_BASIC_DIALOG_HPP
+#define QUALLA_DETAIL_BASIC_DIALOG_HPP
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <span>
+#include <stdint.h>
+
+#include <qualla/detail/json.hpp>
+#include <qualla/env.hpp>
+#include <qualla/dialog.hpp>
+
+namespace qualla {
+
+class BasicDialog : public Dialog {
+  public:
+    BasicDialog(std::shared_ptr<Env> env, const std::string& name, const json& conf);
+
+    virtual bool process(std::vector<int32_t>& tokens, qualla::DialogCallback callback) override;
+
+    virtual bool process(std::vector<int32_t>& tokens, Dialog::Callback callback) override;
+
+    virtual bool process(std::vector<uint8_t>& embedding_vectors, Dialog::T2ECallback t2eCallback, Dialog::Callback callback) override;
+  private:
+
+    bool processFollowOnGeneration(std::vector<int32_t>& tokens, std::vector<float>& logits, Dialog::Callback callback);
+
+    bool processFollowOnGeneration(std::vector<int32_t>& tokens, std::vector<float>& logits, qualla::DialogCallback callback);
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_BASIC_DIALOG_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/basic-sampler.hpp b/Genie/Genie/src/qualla/include/qualla/detail/basic-sampler.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0cee18fd02e535816518bbbcc1e82d1bf89d6c12
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/basic-sampler.hpp
@@ -0,0 +1,49 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_BASIC_SAMPLER_HPP
+#define QUALLA_DETAIL_BASIC_SAMPLER_HPP
+
+#include <random>
+#include <memory>
+#include <vector>
+#include <string>
+
+#include <qualla/detail/json.hpp>
+#include <qualla/context.hpp>
+
+namespace qualla {
+
+class BasicSampler : public Sampler {
+  public:
+    BasicSampler(Context& ctx, const json& conf);
+
+    virtual int32_t process(std::span<const float> logits) override;
+    virtual int32_t process(
+            std::span<const float> logits,
+            std::vector<float>&    probs_out,
+            bool                   tok_out
+    ) override;
+
+    virtual std::vector<int32_t> process_multiple(
+            std::span<const float>& logits,
+            std::vector<float>&     probs,
+            int32_t                 num_return
+    ) override;
+
+    virtual bool save(const std::string& name) override;
+    virtual bool restore(const std::string& name) override;
+    virtual void reset() override;
+
+  protected:
+    int32_t _process(std::span<const float> logits, std::vector<float>* probs_out, bool samp_tok);
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_BASIC_SAMPLER_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/cache-file.hpp b/Genie/Genie/src/qualla/include/qualla/detail/cache-file.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..26915673b0e97b48c1dbd45b1ff566e19a5358b6
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/cache-file.hpp
@@ -0,0 +1,71 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_CACHE_FILE_HPP
+#define QUALLA_DETAIL_CACHE_FILE_HPP
+
+#include <stdint.h>
+
+namespace qualla {
+
+struct CacheFileSpec {
+    // clang-format off
+    enum DataType : uint8_t {
+        UINT8_T,  UINT16_T,  UINT32_T,  UINT64_T,
+        INT8_T,   INT16_T,   INT32_T,   INT64_T,
+        FLOAT8_T, FLOAT16_T, FLOAT32_T, FLOAT64_T,
+        BOOL
+    };
+    // clang-format on
+
+    uint32_t num_tensors;
+    uint32_t magic;
+
+    // Let's assume all tensors have "same" datatype and update_size
+    DataType dtype;
+    uint8_t  pad8_t;
+    uint16_t n_heads;
+    uint16_t embed_dim;
+    uint16_t update_size;
+    CacheFileSpec() {}
+    CacheFileSpec(
+            uint32_t _num_tensors,
+            uint32_t _magic,
+            DataType _dtype,
+            uint8_t  _pad8_t,
+            uint16_t _n_heads,
+            uint16_t _embed_dim,
+            uint16_t _update_size
+    ) {
+        num_tensors = _num_tensors;
+        magic       = _magic;
+        dtype       = _dtype;
+        pad8_t      = _pad8_t;
+        n_heads     = _n_heads;
+        embed_dim   = _embed_dim;
+        update_size = _update_size;
+    }
+};
+
+static_assert(sizeof(size_t) == 8);
+static_assert(sizeof(CacheFileSpec) == 16); // Make sure alignment is correct
+
+struct CacheTensorSpec {
+    uint64_t start_offset;
+    uint64_t data_size;
+    uint8_t  concat_dim;
+
+    char graph_name[127];
+    char tensor_name[128];
+};
+
+static_assert(sizeof(CacheTensorSpec) == 272);
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_CACHE_FILE_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/config.hpp b/Genie/Genie/src/qualla/include/qualla/detail/config.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..7a887caedab740771c3714eb02b8a79066eb79b7
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/config.hpp
@@ -0,0 +1,62 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_CONFIG_HPP
+#define QUALLA_DETAIL_CONFIG_HPP
+
+#include <qualla/detail/json.hpp>
+#include <sstream>
+
+namespace qualla {
+
+struct Config {
+    const qualla::json& json;
+    const std::string   pref;
+
+    Config(const qualla::json& j, const std::string& p = "qualla:") : json(j), pref(p) {}
+
+    // Optional value, returns the default if the key is not found.
+    template <typename T>
+    T optional(const std::string& k, T d) {
+        if (json.contains(k))
+            return json[k].get<T>();
+        else
+            return d;
+    }
+
+    // Mandatory value, throws runtime_error if the key is not found.
+    template <typename T>
+    T mandatory(const std::string& k) {
+        if (json.contains(k)) return json[k].get<T>();
+        std::stringstream ss;
+        ss << pref << " mandatory config key : (" << k << ") not found in : " << json << std::endl;
+        throw std::runtime_error(ss.str());
+    }
+
+    // Optional value, returns the default if the key is not found.
+    template <typename T>
+    static inline T optional(const qualla::json& j, const std::string& k, T d) {
+        if (j.contains(k))
+            return j[k].get<T>();
+        else
+            return d;
+    }
+
+    // Mandatory value, throws runtime_error if the key is not found.
+    template <typename T>
+    static inline T mandatory(const qualla::json& j, const std::string& k) {
+        if (j.contains(k)) return j[k].get<T>();
+        std::stringstream ss;
+        ss << "qualla: mandatory config key : (" << k << ") not found in : " << j << std::endl;
+        throw std::runtime_error(ss.str());
+    }
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_CONFIG_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/exports.h b/Genie/Genie/src/qualla/include/qualla/detail/exports.h
new file mode 100644
index 0000000000000000000000000000000000000000..74f478c3151e49fc3a887f26e9aa18f6090de48e
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/exports.h
@@ -0,0 +1,27 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_EXPORTS_HPP
+#define QUALLA_DETAIL_EXPORTS_HPP
+
+#ifdef _WIN32
+    #ifndef QUALLA_STATIC
+        #ifdef qualla_EXPORTS
+            #define QUALLA_API __declspec(dllexport)
+        #else
+            #define QUALLA_API __declspec(dllimport)
+        #endif
+    #else
+        #define QUALLA_API
+    #endif
+#else // _WIN32
+    #define QUALLA_API
+    #define __stdcall
+#endif
+
+#endif // QUALLA_DETAIL_EXPORTS_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/gpio-marker.hpp b/Genie/Genie/src/qualla/include/qualla/detail/gpio-marker.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b9f4ed0ef1a6149471780f43e61fd686833e0481
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/gpio-marker.hpp
@@ -0,0 +1,40 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/detail/exports.h>
+#include <qualla/detail/config.hpp>
+
+#include <string>
+#include <vector>
+
+namespace qualla {
+
+class GpioMarker {
+  public:
+    QUALLA_API GpioMarker(const qualla::json& conf);
+    QUALLA_API virtual ~GpioMarker();
+
+    // Perform a pull-up or pull-down operation on GPIO
+    QUALLA_API virtual void set();
+
+    // Set GPIO to low
+    QUALLA_API virtual void reset();
+
+    QUALLA_API static std::unique_ptr<GpioMarker> create(std::istream& json_stream);
+    QUALLA_API static std::unique_ptr<GpioMarker> create(const std::string& json_str);
+    QUALLA_API static std::unique_ptr<GpioMarker> create(const qualla::json& conf = {});
+
+  private:
+    std::string _tool_path;          // Gpio-set tool path
+    std::string _command;            // Command for gpio-set tool
+    int32_t     _gpio_num;           // Gpio num to be set
+    int32_t     _gpio_status;        // Gpio current status(pull up or pull down)
+    int32_t     _gpio_marker_enable; // Flag of function enable or not
+};
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/json.hpp b/Genie/Genie/src/qualla/include/qualla/detail/json.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..052bc3f3cef33ff70f3670d0e822bad4f86245bf
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/json.hpp
@@ -0,0 +1,24603 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+/****************************************************************************\
+ * Note on documentation: The source files contain links to the online      *
+ * documentation of the public API at https://json.nlohmann.me. This URL    *
+ * contains the most recent documentation and should also be applicable to  *
+ * previous versions; documentation for deprecated functions is not         *
+ * removed, but marked deprecated. See "Generate documentation" section in  *
+ * file docs/README.md.                                                     *
+\****************************************************************************/
+
+#ifndef QUALLA_JSON_HPP_
+#define QUALLA_JSON_HPP_
+
+#include <algorithm> // all_of, find, for_each
+#include <cstddef> // nullptr_t, ptrdiff_t, size_t
+#include <functional> // hash, less
+#include <initializer_list> // initializer_list
+#ifndef JSON_NO_IO
+    #include <iosfwd> // istream, ostream
+#endif  // JSON_NO_IO
+#include <iterator> // random_access_iterator_tag
+#include <memory> // unique_ptr
+#include <numeric> // accumulate
+#include <string> // string, stoi, to_string
+#include <utility> // declval, forward, move, pair, swap
+#include <vector> // vector
+
+// #include <qualla/adl_serializer.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <utility>
+
+// #include <qualla/detail/abi_macros.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+// This file contains all macro definitions affecting or depending on the ABI
+
+#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK
+    #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH)
+        #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 2
+            #warning "Already included a different version of the library!"
+        #endif
+    #endif
+#endif
+
+#define NLOHMANN_JSON_VERSION_MAJOR 3   // NOLINT(modernize-macro-to-enum)
+#define NLOHMANN_JSON_VERSION_MINOR 11  // NOLINT(modernize-macro-to-enum)
+#define NLOHMANN_JSON_VERSION_PATCH 2   // NOLINT(modernize-macro-to-enum)
+
+#ifndef JSON_DIAGNOSTICS
+    #define JSON_DIAGNOSTICS 0
+#endif
+
+#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
+    #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0
+#endif
+
+#if JSON_DIAGNOSTICS
+    #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag
+#else
+    #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS
+#endif
+
+#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
+    #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp
+#else
+    #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON
+#endif
+
+#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION
+    #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0
+#endif
+
+// Construct the namespace ABI tags component
+#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b
+#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \
+    NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b)
+
+#define NLOHMANN_JSON_ABI_TAGS                                       \
+    NLOHMANN_JSON_ABI_TAGS_CONCAT(                                   \
+            NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS,                       \
+            NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON)
+
+// Construct the namespace version component
+#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \
+    _v ## major ## _ ## minor ## _ ## patch
+#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \
+    NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch)
+
+#if NLOHMANN_JSON_NAMESPACE_NO_VERSION
+#define NLOHMANN_JSON_NAMESPACE_VERSION
+#else
+#define NLOHMANN_JSON_NAMESPACE_VERSION                                 \
+    NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \
+                                           NLOHMANN_JSON_VERSION_MINOR, \
+                                           NLOHMANN_JSON_VERSION_PATCH)
+#endif
+
+// Combine namespace components
+#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b
+#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \
+    NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b)
+
+#ifndef NLOHMANN_JSON_NAMESPACE
+#define NLOHMANN_JSON_NAMESPACE               \
+    qualla::NLOHMANN_JSON_NAMESPACE_CONCAT( \
+            NLOHMANN_JSON_ABI_TAGS,           \
+            NLOHMANN_JSON_NAMESPACE_VERSION)
+#endif
+
+#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN
+#define NLOHMANN_JSON_NAMESPACE_BEGIN                \
+    namespace qualla                               \
+    {                                                \
+    inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \
+                NLOHMANN_JSON_ABI_TAGS,              \
+                NLOHMANN_JSON_NAMESPACE_VERSION)     \
+    {
+#endif
+
+#ifndef NLOHMANN_JSON_NAMESPACE_END
+#define NLOHMANN_JSON_NAMESPACE_END                                     \
+    }  /* namespace (inline namespace) NOLINT(readability/namespace) */ \
+    }  // namespace qualla
+#endif
+
+// #include <qualla/detail/conversions/from_json.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <algorithm> // transform
+#include <array> // array
+#include <forward_list> // forward_list
+#include <iterator> // inserter, front_inserter, end
+#include <map> // map
+#include <string> // string
+#include <tuple> // tuple, make_tuple
+#include <type_traits> // is_arithmetic, is_same, is_enum, underlying_type, is_convertible
+#include <unordered_map> // unordered_map
+#include <utility> // pair, declval
+#include <valarray> // valarray
+
+// #include <qualla/detail/exceptions.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <cstddef> // nullptr_t
+#include <exception> // exception
+#include <stdexcept> // runtime_error
+#include <string> // to_string
+#include <vector> // vector
+
+// #include <qualla/detail/value_t.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <array> // array
+#include <cstddef> // size_t
+#include <cstdint> // uint8_t
+#include <string> // string
+
+// #include <qualla/detail/macro_scope.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <utility> // declval, pair
+// #include <qualla/detail/meta/detected.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <type_traits>
+
+// #include <qualla/detail/meta/void_t.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+// #include <qualla/detail/abi_macros.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+template<typename ...Ts> struct make_void
+{
+    using type = void;
+};
+template<typename ...Ts> using void_t = typename make_void<Ts...>::type;
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+// https://en.cppreference.com/w/cpp/experimental/is_detected
+struct nonesuch
+{
+    nonesuch() = delete;
+    ~nonesuch() = delete;
+    nonesuch(nonesuch const&) = delete;
+    nonesuch(nonesuch const&&) = delete;
+    void operator=(nonesuch const&) = delete;
+    void operator=(nonesuch&&) = delete;
+};
+
+template<class Default,
+         class AlwaysVoid,
+         template<class...> class Op,
+         class... Args>
+struct detector
+{
+    using value_t = std::false_type;
+    using type = Default;
+};
+
+template<class Default, template<class...> class Op, class... Args>
+struct detector<Default, void_t<Op<Args...>>, Op, Args...>
+{
+    using value_t = std::true_type;
+    using type = Op<Args...>;
+};
+
+template<template<class...> class Op, class... Args>
+using is_detected = typename detector<nonesuch, void, Op, Args...>::value_t;
+
+template<template<class...> class Op, class... Args>
+struct is_detected_lazy : is_detected<Op, Args...> { };
+
+template<template<class...> class Op, class... Args>
+using detected_t = typename detector<nonesuch, void, Op, Args...>::type;
+
+template<class Default, template<class...> class Op, class... Args>
+using detected_or = detector<Default, void, Op, Args...>;
+
+template<class Default, template<class...> class Op, class... Args>
+using detected_or_t = typename detected_or<Default, Op, Args...>::type;
+
+template<class Expected, template<class...> class Op, class... Args>
+using is_detected_exact = std::is_same<Expected, detected_t<Op, Args...>>;
+
+template<class To, template<class...> class Op, class... Args>
+using is_detected_convertible =
+    std::is_convertible<detected_t<Op, Args...>, To>;
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/thirdparty/hedley/hedley.hpp>
+
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson <evan@nemerson.com>
+// SPDX-License-Identifier: MIT
+
+/* Hedley - https://nemequ.github.io/hedley
+ * Created by Evan Nemerson <evan@nemerson.com>
+ */
+
+#if !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < 15)
+#if defined(JSON_HEDLEY_VERSION)
+    #undef JSON_HEDLEY_VERSION
+#endif
+#define JSON_HEDLEY_VERSION 15
+
+#if defined(JSON_HEDLEY_STRINGIFY_EX)
+    #undef JSON_HEDLEY_STRINGIFY_EX
+#endif
+#define JSON_HEDLEY_STRINGIFY_EX(x) #x
+
+#if defined(JSON_HEDLEY_STRINGIFY)
+    #undef JSON_HEDLEY_STRINGIFY
+#endif
+#define JSON_HEDLEY_STRINGIFY(x) JSON_HEDLEY_STRINGIFY_EX(x)
+
+#if defined(JSON_HEDLEY_CONCAT_EX)
+    #undef JSON_HEDLEY_CONCAT_EX
+#endif
+#define JSON_HEDLEY_CONCAT_EX(a,b) a##b
+
+#if defined(JSON_HEDLEY_CONCAT)
+    #undef JSON_HEDLEY_CONCAT
+#endif
+#define JSON_HEDLEY_CONCAT(a,b) JSON_HEDLEY_CONCAT_EX(a,b)
+
+#if defined(JSON_HEDLEY_CONCAT3_EX)
+    #undef JSON_HEDLEY_CONCAT3_EX
+#endif
+#define JSON_HEDLEY_CONCAT3_EX(a,b,c) a##b##c
+
+#if defined(JSON_HEDLEY_CONCAT3)
+    #undef JSON_HEDLEY_CONCAT3
+#endif
+#define JSON_HEDLEY_CONCAT3(a,b,c) JSON_HEDLEY_CONCAT3_EX(a,b,c)
+
+#if defined(JSON_HEDLEY_VERSION_ENCODE)
+    #undef JSON_HEDLEY_VERSION_ENCODE
+#endif
+#define JSON_HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision))
+
+#if defined(JSON_HEDLEY_VERSION_DECODE_MAJOR)
+    #undef JSON_HEDLEY_VERSION_DECODE_MAJOR
+#endif
+#define JSON_HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000)
+
+#if defined(JSON_HEDLEY_VERSION_DECODE_MINOR)
+    #undef JSON_HEDLEY_VERSION_DECODE_MINOR
+#endif
+#define JSON_HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000)
+
+#if defined(JSON_HEDLEY_VERSION_DECODE_REVISION)
+    #undef JSON_HEDLEY_VERSION_DECODE_REVISION
+#endif
+#define JSON_HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000)
+
+#if defined(JSON_HEDLEY_GNUC_VERSION)
+    #undef JSON_HEDLEY_GNUC_VERSION
+#endif
+#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__)
+    #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
+#elif defined(__GNUC__)
+    #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0)
+#endif
+
+#if defined(JSON_HEDLEY_GNUC_VERSION_CHECK)
+    #undef JSON_HEDLEY_GNUC_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_GNUC_VERSION)
+    #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GNUC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_MSVC_VERSION)
+    #undef JSON_HEDLEY_MSVC_VERSION
+#endif
+#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL)
+    #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100)
+#elif defined(_MSC_FULL_VER) && !defined(__ICL)
+    #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10)
+#elif defined(_MSC_VER) && !defined(__ICL)
+    #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0)
+#endif
+
+#if defined(JSON_HEDLEY_MSVC_VERSION_CHECK)
+    #undef JSON_HEDLEY_MSVC_VERSION_CHECK
+#endif
+#if !defined(JSON_HEDLEY_MSVC_VERSION)
+    #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0)
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
+    #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch)))
+#elif defined(_MSC_VER) && (_MSC_VER >= 1200)
+    #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch)))
+#else
+    #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor)))
+#endif
+
+#if defined(JSON_HEDLEY_INTEL_VERSION)
+    #undef JSON_HEDLEY_INTEL_VERSION
+#endif
+#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL)
+    #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE)
+#elif defined(__INTEL_COMPILER) && !defined(__ICL)
+    #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0)
+#endif
+
+#if defined(JSON_HEDLEY_INTEL_VERSION_CHECK)
+    #undef JSON_HEDLEY_INTEL_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_INTEL_VERSION)
+    #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_INTEL_CL_VERSION)
+    #undef JSON_HEDLEY_INTEL_CL_VERSION
+#endif
+#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL)
+    #define JSON_HEDLEY_INTEL_CL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0)
+#endif
+
+#if defined(JSON_HEDLEY_INTEL_CL_VERSION_CHECK)
+    #undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_INTEL_CL_VERSION)
+    #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_CL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_PGI_VERSION)
+    #undef JSON_HEDLEY_PGI_VERSION
+#endif
+#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__)
+    #define JSON_HEDLEY_PGI_VERSION JSON_HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__)
+#endif
+
+#if defined(JSON_HEDLEY_PGI_VERSION_CHECK)
+    #undef JSON_HEDLEY_PGI_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_PGI_VERSION)
+    #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PGI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_SUNPRO_VERSION)
+    #undef JSON_HEDLEY_SUNPRO_VERSION
+#endif
+#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000)
+    #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10)
+#elif defined(__SUNPRO_C)
+    #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf)
+#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000)
+    #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10)
+#elif defined(__SUNPRO_CC)
+    #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf)
+#endif
+
+#if defined(JSON_HEDLEY_SUNPRO_VERSION_CHECK)
+    #undef JSON_HEDLEY_SUNPRO_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_SUNPRO_VERSION)
+    #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_SUNPRO_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION)
+    #undef JSON_HEDLEY_EMSCRIPTEN_VERSION
+#endif
+#if defined(__EMSCRIPTEN__)
+    #define JSON_HEDLEY_EMSCRIPTEN_VERSION JSON_HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__)
+#endif
+
+#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK)
+    #undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION)
+    #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_EMSCRIPTEN_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_ARM_VERSION)
+    #undef JSON_HEDLEY_ARM_VERSION
+#endif
+#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION)
+    #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100)
+#elif defined(__CC_ARM) && defined(__ARMCC_VERSION)
+    #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100)
+#endif
+
+#if defined(JSON_HEDLEY_ARM_VERSION_CHECK)
+    #undef JSON_HEDLEY_ARM_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_ARM_VERSION)
+    #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_ARM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_IBM_VERSION)
+    #undef JSON_HEDLEY_IBM_VERSION
+#endif
+#if defined(__ibmxl__)
+    #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__)
+#elif defined(__xlC__) && defined(__xlC_ver__)
+    #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff)
+#elif defined(__xlC__)
+    #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0)
+#endif
+
+#if defined(JSON_HEDLEY_IBM_VERSION_CHECK)
+    #undef JSON_HEDLEY_IBM_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_IBM_VERSION)
+    #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IBM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_TI_VERSION)
+    #undef JSON_HEDLEY_TI_VERSION
+#endif
+#if \
+    defined(__TI_COMPILER_VERSION__) && \
+    ( \
+      defined(__TMS470__) || defined(__TI_ARM__) || \
+      defined(__MSP430__) || \
+      defined(__TMS320C2000__) \
+    )
+#if (__TI_COMPILER_VERSION__ >= 16000000)
+    #define JSON_HEDLEY_TI_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
+#endif
+#endif
+
+#if defined(JSON_HEDLEY_TI_VERSION_CHECK)
+    #undef JSON_HEDLEY_TI_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_TI_VERSION)
+    #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_TI_CL2000_VERSION)
+    #undef JSON_HEDLEY_TI_CL2000_VERSION
+#endif
+#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__)
+    #define JSON_HEDLEY_TI_CL2000_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
+#endif
+
+#if defined(JSON_HEDLEY_TI_CL2000_VERSION_CHECK)
+    #undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_TI_CL2000_VERSION)
+    #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL2000_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_TI_CL430_VERSION)
+    #undef JSON_HEDLEY_TI_CL430_VERSION
+#endif
+#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__)
+    #define JSON_HEDLEY_TI_CL430_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
+#endif
+
+#if defined(JSON_HEDLEY_TI_CL430_VERSION_CHECK)
+    #undef JSON_HEDLEY_TI_CL430_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_TI_CL430_VERSION)
+    #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL430_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_TI_ARMCL_VERSION)
+    #undef JSON_HEDLEY_TI_ARMCL_VERSION
+#endif
+#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__))
+    #define JSON_HEDLEY_TI_ARMCL_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
+#endif
+
+#if defined(JSON_HEDLEY_TI_ARMCL_VERSION_CHECK)
+    #undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_TI_ARMCL_VERSION)
+    #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_ARMCL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_TI_CL6X_VERSION)
+    #undef JSON_HEDLEY_TI_CL6X_VERSION
+#endif
+#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__)
+    #define JSON_HEDLEY_TI_CL6X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
+#endif
+
+#if defined(JSON_HEDLEY_TI_CL6X_VERSION_CHECK)
+    #undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_TI_CL6X_VERSION)
+    #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL6X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_TI_CL7X_VERSION)
+    #undef JSON_HEDLEY_TI_CL7X_VERSION
+#endif
+#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__)
+    #define JSON_HEDLEY_TI_CL7X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
+#endif
+
+#if defined(JSON_HEDLEY_TI_CL7X_VERSION_CHECK)
+    #undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_TI_CL7X_VERSION)
+    #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL7X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_TI_CLPRU_VERSION)
+    #undef JSON_HEDLEY_TI_CLPRU_VERSION
+#endif
+#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__)
+    #define JSON_HEDLEY_TI_CLPRU_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
+#endif
+
+#if defined(JSON_HEDLEY_TI_CLPRU_VERSION_CHECK)
+    #undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_TI_CLPRU_VERSION)
+    #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CLPRU_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_CRAY_VERSION)
+    #undef JSON_HEDLEY_CRAY_VERSION
+#endif
+#if defined(_CRAYC)
+    #if defined(_RELEASE_PATCHLEVEL)
+        #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL)
+    #else
+        #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0)
+    #endif
+#endif
+
+#if defined(JSON_HEDLEY_CRAY_VERSION_CHECK)
+    #undef JSON_HEDLEY_CRAY_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_CRAY_VERSION)
+    #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_CRAY_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_IAR_VERSION)
+    #undef JSON_HEDLEY_IAR_VERSION
+#endif
+#if defined(__IAR_SYSTEMS_ICC__)
+    #if __VER__ > 1000
+        #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000))
+    #else
+        #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0)
+    #endif
+#endif
+
+#if defined(JSON_HEDLEY_IAR_VERSION_CHECK)
+    #undef JSON_HEDLEY_IAR_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_IAR_VERSION)
+    #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IAR_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_TINYC_VERSION)
+    #undef JSON_HEDLEY_TINYC_VERSION
+#endif
+#if defined(__TINYC__)
+    #define JSON_HEDLEY_TINYC_VERSION JSON_HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100)
+#endif
+
+#if defined(JSON_HEDLEY_TINYC_VERSION_CHECK)
+    #undef JSON_HEDLEY_TINYC_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_TINYC_VERSION)
+    #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TINYC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_DMC_VERSION)
+    #undef JSON_HEDLEY_DMC_VERSION
+#endif
+#if defined(__DMC__)
+    #define JSON_HEDLEY_DMC_VERSION JSON_HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf)
+#endif
+
+#if defined(JSON_HEDLEY_DMC_VERSION_CHECK)
+    #undef JSON_HEDLEY_DMC_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_DMC_VERSION)
+    #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_DMC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_COMPCERT_VERSION)
+    #undef JSON_HEDLEY_COMPCERT_VERSION
+#endif
+#if defined(__COMPCERT_VERSION__)
+    #define JSON_HEDLEY_COMPCERT_VERSION JSON_HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100)
+#endif
+
+#if defined(JSON_HEDLEY_COMPCERT_VERSION_CHECK)
+    #undef JSON_HEDLEY_COMPCERT_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_COMPCERT_VERSION)
+    #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_COMPCERT_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_PELLES_VERSION)
+    #undef JSON_HEDLEY_PELLES_VERSION
+#endif
+#if defined(__POCC__)
+    #define JSON_HEDLEY_PELLES_VERSION JSON_HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0)
+#endif
+
+#if defined(JSON_HEDLEY_PELLES_VERSION_CHECK)
+    #undef JSON_HEDLEY_PELLES_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_PELLES_VERSION)
+    #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PELLES_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_MCST_LCC_VERSION)
+    #undef JSON_HEDLEY_MCST_LCC_VERSION
+#endif
+#if defined(__LCC__) && defined(__LCC_MINOR__)
+    #define JSON_HEDLEY_MCST_LCC_VERSION JSON_HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__)
+#endif
+
+#if defined(JSON_HEDLEY_MCST_LCC_VERSION_CHECK)
+    #undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_MCST_LCC_VERSION)
+    #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_MCST_LCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_GCC_VERSION)
+    #undef JSON_HEDLEY_GCC_VERSION
+#endif
+#if \
+    defined(JSON_HEDLEY_GNUC_VERSION) && \
+    !defined(__clang__) && \
+    !defined(JSON_HEDLEY_INTEL_VERSION) && \
+    !defined(JSON_HEDLEY_PGI_VERSION) && \
+    !defined(JSON_HEDLEY_ARM_VERSION) && \
+    !defined(JSON_HEDLEY_CRAY_VERSION) && \
+    !defined(JSON_HEDLEY_TI_VERSION) && \
+    !defined(JSON_HEDLEY_TI_ARMCL_VERSION) && \
+    !defined(JSON_HEDLEY_TI_CL430_VERSION) && \
+    !defined(JSON_HEDLEY_TI_CL2000_VERSION) && \
+    !defined(JSON_HEDLEY_TI_CL6X_VERSION) && \
+    !defined(JSON_HEDLEY_TI_CL7X_VERSION) && \
+    !defined(JSON_HEDLEY_TI_CLPRU_VERSION) && \
+    !defined(__COMPCERT__) && \
+    !defined(JSON_HEDLEY_MCST_LCC_VERSION)
+    #define JSON_HEDLEY_GCC_VERSION JSON_HEDLEY_GNUC_VERSION
+#endif
+
+#if defined(JSON_HEDLEY_GCC_VERSION_CHECK)
+    #undef JSON_HEDLEY_GCC_VERSION_CHECK
+#endif
+#if defined(JSON_HEDLEY_GCC_VERSION)
+    #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
+#else
+    #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0)
+#endif
+
+#if defined(JSON_HEDLEY_HAS_ATTRIBUTE)
+    #undef JSON_HEDLEY_HAS_ATTRIBUTE
+#endif
+#if \
+  defined(__has_attribute) && \
+  ( \
+    (!defined(JSON_HEDLEY_IAR_VERSION) || JSON_HEDLEY_IAR_VERSION_CHECK(8,5,9)) \
+  )
+#  define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute)
+#else
+#  define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) (0)
+#endif
+
+#if defined(JSON_HEDLEY_GNUC_HAS_ATTRIBUTE)
+    #undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE
+#endif
+#if defined(__has_attribute)
+    #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute)
+#else
+    #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_GCC_HAS_ATTRIBUTE)
+    #undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE
+#endif
+#if defined(__has_attribute)
+    #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute)
+#else
+    #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE)
+    #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE
+#endif
+#if \
+    defined(__has_cpp_attribute) && \
+    defined(__cplusplus) && \
+    (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0))
+    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute)
+#else
+    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0)
+#endif
+
+#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS)
+    #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS
+#endif
+#if !defined(__cplusplus) || !defined(__has_cpp_attribute)
+    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0)
+#elif \
+    !defined(JSON_HEDLEY_PGI_VERSION) && \
+    !defined(JSON_HEDLEY_IAR_VERSION) && \
+    (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \
+    (!defined(JSON_HEDLEY_MSVC_VERSION) || JSON_HEDLEY_MSVC_VERSION_CHECK(19,20,0))
+    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute)
+#else
+    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0)
+#endif
+
+#if defined(JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE)
+    #undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE
+#endif
+#if defined(__has_cpp_attribute) && defined(__cplusplus)
+    #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute)
+#else
+    #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE)
+    #undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE
+#endif
+#if defined(__has_cpp_attribute) && defined(__cplusplus)
+    #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute)
+#else
+    #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_HAS_BUILTIN)
+    #undef JSON_HEDLEY_HAS_BUILTIN
+#endif
+#if defined(__has_builtin)
+    #define JSON_HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin)
+#else
+    #define JSON_HEDLEY_HAS_BUILTIN(builtin) (0)
+#endif
+
+#if defined(JSON_HEDLEY_GNUC_HAS_BUILTIN)
+    #undef JSON_HEDLEY_GNUC_HAS_BUILTIN
+#endif
+#if defined(__has_builtin)
+    #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin)
+#else
+    #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_GCC_HAS_BUILTIN)
+    #undef JSON_HEDLEY_GCC_HAS_BUILTIN
+#endif
+#if defined(__has_builtin)
+    #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin)
+#else
+    #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_HAS_FEATURE)
+    #undef JSON_HEDLEY_HAS_FEATURE
+#endif
+#if defined(__has_feature)
+    #define JSON_HEDLEY_HAS_FEATURE(feature) __has_feature(feature)
+#else
+    #define JSON_HEDLEY_HAS_FEATURE(feature) (0)
+#endif
+
+#if defined(JSON_HEDLEY_GNUC_HAS_FEATURE)
+    #undef JSON_HEDLEY_GNUC_HAS_FEATURE
+#endif
+#if defined(__has_feature)
+    #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature)
+#else
+    #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_GCC_HAS_FEATURE)
+    #undef JSON_HEDLEY_GCC_HAS_FEATURE
+#endif
+#if defined(__has_feature)
+    #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature)
+#else
+    #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_HAS_EXTENSION)
+    #undef JSON_HEDLEY_HAS_EXTENSION
+#endif
+#if defined(__has_extension)
+    #define JSON_HEDLEY_HAS_EXTENSION(extension) __has_extension(extension)
+#else
+    #define JSON_HEDLEY_HAS_EXTENSION(extension) (0)
+#endif
+
+#if defined(JSON_HEDLEY_GNUC_HAS_EXTENSION)
+    #undef JSON_HEDLEY_GNUC_HAS_EXTENSION
+#endif
+#if defined(__has_extension)
+    #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension)
+#else
+    #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_GCC_HAS_EXTENSION)
+    #undef JSON_HEDLEY_GCC_HAS_EXTENSION
+#endif
+#if defined(__has_extension)
+    #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension)
+#else
+    #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE)
+    #undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE
+#endif
+#if defined(__has_declspec_attribute)
+    #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute)
+#else
+    #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0)
+#endif
+
+#if defined(JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE)
+    #undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE
+#endif
+#if defined(__has_declspec_attribute)
+    #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute)
+#else
+    #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE)
+    #undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE
+#endif
+#if defined(__has_declspec_attribute)
+    #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute)
+#else
+    #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_HAS_WARNING)
+    #undef JSON_HEDLEY_HAS_WARNING
+#endif
+#if defined(__has_warning)
+    #define JSON_HEDLEY_HAS_WARNING(warning) __has_warning(warning)
+#else
+    #define JSON_HEDLEY_HAS_WARNING(warning) (0)
+#endif
+
+#if defined(JSON_HEDLEY_GNUC_HAS_WARNING)
+    #undef JSON_HEDLEY_GNUC_HAS_WARNING
+#endif
+#if defined(__has_warning)
+    #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning)
+#else
+    #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_GCC_HAS_WARNING)
+    #undef JSON_HEDLEY_GCC_HAS_WARNING
+#endif
+#if defined(__has_warning)
+    #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning)
+#else
+    #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if \
+    (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
+    defined(__clang__) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \
+    JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \
+    JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \
+    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
+    JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \
+    JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \
+    JSON_HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \
+    (JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR))
+    #define JSON_HEDLEY_PRAGMA(value) _Pragma(#value)
+#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0)
+    #define JSON_HEDLEY_PRAGMA(value) __pragma(value)
+#else
+    #define JSON_HEDLEY_PRAGMA(value)
+#endif
+
+#if defined(JSON_HEDLEY_DIAGNOSTIC_PUSH)
+    #undef JSON_HEDLEY_DIAGNOSTIC_PUSH
+#endif
+#if defined(JSON_HEDLEY_DIAGNOSTIC_POP)
+    #undef JSON_HEDLEY_DIAGNOSTIC_POP
+#endif
+#if defined(__clang__)
+    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
+    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
+#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
+    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
+#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
+    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
+#elif \
+    JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \
+    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push))
+    #define JSON_HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop))
+#elif JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("push")
+    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("pop")
+#elif \
+    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
+    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push")
+    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop")
+#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
+    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
+#else
+    #define JSON_HEDLEY_DIAGNOSTIC_PUSH
+    #define JSON_HEDLEY_DIAGNOSTIC_POP
+#endif
+
+/* JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for
+   HEDLEY INTERNAL USE ONLY.  API subject to change without notice. */
+#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_)
+    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_
+#endif
+#if defined(__cplusplus)
+#  if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat")
+#    if JSON_HEDLEY_HAS_WARNING("-Wc++17-extensions")
+#      if JSON_HEDLEY_HAS_WARNING("-Wc++1z-extensions")
+#        define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
+    JSON_HEDLEY_DIAGNOSTIC_PUSH \
+    _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \
+    _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \
+    _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \
+    xpr \
+    JSON_HEDLEY_DIAGNOSTIC_POP
+#      else
+#        define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
+    JSON_HEDLEY_DIAGNOSTIC_PUSH \
+    _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \
+    _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \
+    xpr \
+    JSON_HEDLEY_DIAGNOSTIC_POP
+#      endif
+#    else
+#      define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
+    JSON_HEDLEY_DIAGNOSTIC_PUSH \
+    _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \
+    xpr \
+    JSON_HEDLEY_DIAGNOSTIC_POP
+#    endif
+#  endif
+#endif
+#if !defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x
+#endif
+
+#if defined(JSON_HEDLEY_CONST_CAST)
+    #undef JSON_HEDLEY_CONST_CAST
+#endif
+#if defined(__cplusplus)
+#  define JSON_HEDLEY_CONST_CAST(T, expr) (const_cast<T>(expr))
+#elif \
+  JSON_HEDLEY_HAS_WARNING("-Wcast-qual") || \
+  JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) || \
+  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
+#  define JSON_HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \
+        JSON_HEDLEY_DIAGNOSTIC_PUSH \
+        JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \
+        ((T) (expr)); \
+        JSON_HEDLEY_DIAGNOSTIC_POP \
+    }))
+#else
+#  define JSON_HEDLEY_CONST_CAST(T, expr) ((T) (expr))
+#endif
+
+#if defined(JSON_HEDLEY_REINTERPRET_CAST)
+    #undef JSON_HEDLEY_REINTERPRET_CAST
+#endif
+#if defined(__cplusplus)
+    #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast<T>(expr))
+#else
+    #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr))
+#endif
+
+#if defined(JSON_HEDLEY_STATIC_CAST)
+    #undef JSON_HEDLEY_STATIC_CAST
+#endif
+#if defined(__cplusplus)
+    #define JSON_HEDLEY_STATIC_CAST(T, expr) (static_cast<T>(expr))
+#else
+    #define JSON_HEDLEY_STATIC_CAST(T, expr) ((T) (expr))
+#endif
+
+#if defined(JSON_HEDLEY_CPP_CAST)
+    #undef JSON_HEDLEY_CPP_CAST
+#endif
+#if defined(__cplusplus)
+#  if JSON_HEDLEY_HAS_WARNING("-Wold-style-cast")
+#    define JSON_HEDLEY_CPP_CAST(T, expr) \
+    JSON_HEDLEY_DIAGNOSTIC_PUSH \
+    _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \
+    ((T) (expr)) \
+    JSON_HEDLEY_DIAGNOSTIC_POP
+#  elif JSON_HEDLEY_IAR_VERSION_CHECK(8,3,0)
+#    define JSON_HEDLEY_CPP_CAST(T, expr) \
+    JSON_HEDLEY_DIAGNOSTIC_PUSH \
+    _Pragma("diag_suppress=Pe137") \
+    JSON_HEDLEY_DIAGNOSTIC_POP
+#  else
+#    define JSON_HEDLEY_CPP_CAST(T, expr) ((T) (expr))
+#  endif
+#else
+#  define JSON_HEDLEY_CPP_CAST(T, expr) (expr)
+#endif
+
+#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED)
+    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED
+#endif
+#if JSON_HEDLEY_HAS_WARNING("-Wdeprecated-declarations")
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
+#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)")
+#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786))
+#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445")
+#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444")
+#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996))
+#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444")
+#elif \
+    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
+    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
+    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
+    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718")
+#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)")
+#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)")
+#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215")
+#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)")
+#else
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED
+#endif
+
+#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS)
+    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS
+#endif
+#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas")
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"")
+#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)")
+#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161))
+#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675")
+#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"")
+#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068))
+#elif \
+    JSON_HEDLEY_TI_VERSION_CHECK(16,9,0) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163")
+#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163")
+#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161")
+#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161")
+#else
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS
+#endif
+
+#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES)
+    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES
+#endif
+#if JSON_HEDLEY_HAS_WARNING("-Wunknown-attributes")
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"")
+#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+#elif JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)")
+#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292))
+#elif JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030))
+#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098")
+#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097")
+#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)")
+#elif \
+    JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173")
+#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097")
+#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097")
+#else
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES
+#endif
+
+#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL)
+    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
+#endif
+#if JSON_HEDLEY_HAS_WARNING("-Wcast-qual")
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"")
+#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)")
+#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
+#else
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
+#endif
+
+#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION)
+    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION
+#endif
+#if JSON_HEDLEY_HAS_WARNING("-Wunused-function")
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"")
+#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"")
+#elif JSON_HEDLEY_MSVC_VERSION_CHECK(1,0,0)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505))
+#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142")
+#else
+    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION
+#endif
+
+#if defined(JSON_HEDLEY_DEPRECATED)
+    #undef JSON_HEDLEY_DEPRECATED
+#endif
+#if defined(JSON_HEDLEY_DEPRECATED_FOR)
+    #undef JSON_HEDLEY_DEPRECATED_FOR
+#endif
+#if \
+    JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \
+    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+    #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since))
+    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement))
+#elif \
+    (JSON_HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(JSON_HEDLEY_IAR_VERSION)) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \
+    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \
+    JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
+    JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \
+    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since)))
+    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement)))
+#elif defined(__cplusplus) && (__cplusplus >= 201402L)
+    #define JSON_HEDLEY_DEPRECATED(since) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]])
+    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]])
+#elif \
+    JSON_HEDLEY_HAS_ATTRIBUTE(deprecated) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
+    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
+    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
+    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \
+    JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0)
+    #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__))
+    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__))
+#elif \
+    JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \
+    JSON_HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \
+    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+    #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated)
+    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated)
+#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
+    #define JSON_HEDLEY_DEPRECATED(since) _Pragma("deprecated")
+    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated")
+#else
+    #define JSON_HEDLEY_DEPRECATED(since)
+    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement)
+#endif
+
+#if defined(JSON_HEDLEY_UNAVAILABLE)
+    #undef JSON_HEDLEY_UNAVAILABLE
+#endif
+#if \
+    JSON_HEDLEY_HAS_ATTRIBUTE(warning) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since)))
+#else
+    #define JSON_HEDLEY_UNAVAILABLE(available_since)
+#endif
+
+#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT)
+    #undef JSON_HEDLEY_WARN_UNUSED_RESULT
+#endif
+#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT_MSG)
+    #undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG
+#endif
+#if \
+    JSON_HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
+    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
+    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
+    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
+    (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \
+    JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
+    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__))
+#elif (JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L)
+    #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
+    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]])
+#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard)
+    #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
+    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
+#elif defined(_Check_return_) /* SAL */
+    #define JSON_HEDLEY_WARN_UNUSED_RESULT _Check_return_
+    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_
+#else
+    #define JSON_HEDLEY_WARN_UNUSED_RESULT
+    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg)
+#endif
+
+#if defined(JSON_HEDLEY_SENTINEL)
+    #undef JSON_HEDLEY_SENTINEL
+#endif
+#if \
+    JSON_HEDLEY_HAS_ATTRIBUTE(sentinel) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position)))
+#else
+    #define JSON_HEDLEY_SENTINEL(position)
+#endif
+
+#if defined(JSON_HEDLEY_NO_RETURN)
+    #undef JSON_HEDLEY_NO_RETURN
+#endif
+#if JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
+    #define JSON_HEDLEY_NO_RETURN __noreturn
+#elif \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__))
+#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+    #define JSON_HEDLEY_NO_RETURN _Noreturn
+#elif defined(__cplusplus) && (__cplusplus >= 201103L)
+    #define JSON_HEDLEY_NO_RETURN JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]])
+#elif \
+    JSON_HEDLEY_HAS_ATTRIBUTE(noreturn) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(3,2,0) || \
+    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
+    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
+    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
+    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
+    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
+    JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0)
+    #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__))
+#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)
+    #define JSON_HEDLEY_NO_RETURN _Pragma("does_not_return")
+#elif \
+    JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \
+    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+    #define JSON_HEDLEY_NO_RETURN __declspec(noreturn)
+#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus)
+    #define JSON_HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;")
+#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0)
+    #define JSON_HEDLEY_NO_RETURN __attribute((noreturn))
+#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0)
+    #define JSON_HEDLEY_NO_RETURN __declspec(noreturn)
+#else
+    #define JSON_HEDLEY_NO_RETURN
+#endif
+
+#if defined(JSON_HEDLEY_NO_ESCAPE)
+    #undef JSON_HEDLEY_NO_ESCAPE
+#endif
+#if JSON_HEDLEY_HAS_ATTRIBUTE(noescape)
+    #define JSON_HEDLEY_NO_ESCAPE __attribute__((__noescape__))
+#else
+    #define JSON_HEDLEY_NO_ESCAPE
+#endif
+
+#if defined(JSON_HEDLEY_UNREACHABLE)
+    #undef JSON_HEDLEY_UNREACHABLE
+#endif
+#if defined(JSON_HEDLEY_UNREACHABLE_RETURN)
+    #undef JSON_HEDLEY_UNREACHABLE_RETURN
+#endif
+#if defined(JSON_HEDLEY_ASSUME)
+    #undef JSON_HEDLEY_ASSUME
+#endif
+#if \
+    JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+    #define JSON_HEDLEY_ASSUME(expr) __assume(expr)
+#elif JSON_HEDLEY_HAS_BUILTIN(__builtin_assume)
+    #define JSON_HEDLEY_ASSUME(expr) __builtin_assume(expr)
+#elif \
+    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0)
+    #if defined(__cplusplus)
+        #define JSON_HEDLEY_ASSUME(expr) std::_nassert(expr)
+    #else
+        #define JSON_HEDLEY_ASSUME(expr) _nassert(expr)
+    #endif
+#endif
+#if \
+    (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(JSON_HEDLEY_ARM_VERSION))) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \
+    JSON_HEDLEY_PGI_VERSION_CHECK(18,10,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) || \
+    JSON_HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_UNREACHABLE() __builtin_unreachable()
+#elif defined(JSON_HEDLEY_ASSUME)
+    #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0)
+#endif
+#if !defined(JSON_HEDLEY_ASSUME)
+    #if defined(JSON_HEDLEY_UNREACHABLE)
+        #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (JSON_HEDLEY_UNREACHABLE(), 1)))
+    #else
+        #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, expr)
+    #endif
+#endif
+#if defined(JSON_HEDLEY_UNREACHABLE)
+    #if  \
+        JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \
+        JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0)
+        #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (JSON_HEDLEY_STATIC_CAST(void, JSON_HEDLEY_ASSUME(0)), (value))
+    #else
+        #define JSON_HEDLEY_UNREACHABLE_RETURN(value) JSON_HEDLEY_UNREACHABLE()
+    #endif
+#else
+    #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (value)
+#endif
+#if !defined(JSON_HEDLEY_UNREACHABLE)
+    #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0)
+#endif
+
+JSON_HEDLEY_DIAGNOSTIC_PUSH
+#if JSON_HEDLEY_HAS_WARNING("-Wpedantic")
+    #pragma clang diagnostic ignored "-Wpedantic"
+#endif
+#if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus)
+    #pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
+#endif
+#if JSON_HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0)
+    #if defined(__clang__)
+        #pragma clang diagnostic ignored "-Wvariadic-macros"
+    #elif defined(JSON_HEDLEY_GCC_VERSION)
+        #pragma GCC diagnostic ignored "-Wvariadic-macros"
+    #endif
+#endif
+#if defined(JSON_HEDLEY_NON_NULL)
+    #undef JSON_HEDLEY_NON_NULL
+#endif
+#if \
+    JSON_HEDLEY_HAS_ATTRIBUTE(nonnull) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0)
+    #define JSON_HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__)))
+#else
+    #define JSON_HEDLEY_NON_NULL(...)
+#endif
+JSON_HEDLEY_DIAGNOSTIC_POP
+
+#if defined(JSON_HEDLEY_PRINTF_FORMAT)
+    #undef JSON_HEDLEY_PRINTF_FORMAT
+#endif
+#if defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO)
+    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check)))
+#elif defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO)
+    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check)))
+#elif \
+    JSON_HEDLEY_HAS_ATTRIBUTE(format) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \
+    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
+    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
+    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
+    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
+    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check)))
+#elif JSON_HEDLEY_PELLES_VERSION_CHECK(6,0,0)
+    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check))
+#else
+    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check)
+#endif
+
+#if defined(JSON_HEDLEY_CONSTEXPR)
+    #undef JSON_HEDLEY_CONSTEXPR
+#endif
+#if defined(__cplusplus)
+    #if __cplusplus >= 201103L
+        #define JSON_HEDLEY_CONSTEXPR JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr)
+    #endif
+#endif
+#if !defined(JSON_HEDLEY_CONSTEXPR)
+    #define JSON_HEDLEY_CONSTEXPR
+#endif
+
+#if defined(JSON_HEDLEY_PREDICT)
+    #undef JSON_HEDLEY_PREDICT
+#endif
+#if defined(JSON_HEDLEY_LIKELY)
+    #undef JSON_HEDLEY_LIKELY
+#endif
+#if defined(JSON_HEDLEY_UNLIKELY)
+    #undef JSON_HEDLEY_UNLIKELY
+#endif
+#if defined(JSON_HEDLEY_UNPREDICTABLE)
+    #undef JSON_HEDLEY_UNPREDICTABLE
+#endif
+#if JSON_HEDLEY_HAS_BUILTIN(__builtin_unpredictable)
+    #define JSON_HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr))
+#endif
+#if \
+  (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(JSON_HEDLEY_PGI_VERSION)) || \
+  JSON_HEDLEY_GCC_VERSION_CHECK(9,0,0) || \
+  JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+#  define JSON_HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability(  (expr), (value), (probability))
+#  define JSON_HEDLEY_PREDICT_TRUE(expr, probability)   __builtin_expect_with_probability(!!(expr),    1   , (probability))
+#  define JSON_HEDLEY_PREDICT_FALSE(expr, probability)  __builtin_expect_with_probability(!!(expr),    0   , (probability))
+#  define JSON_HEDLEY_LIKELY(expr)                      __builtin_expect                 (!!(expr),    1                  )
+#  define JSON_HEDLEY_UNLIKELY(expr)                    __builtin_expect                 (!!(expr),    0                  )
+#elif \
+  (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \
+  JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \
+  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+  (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \
+  JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+  JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
+  JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+  JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \
+  JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \
+  JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \
+  JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \
+  JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+  JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
+  JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \
+  JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \
+  JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+#  define JSON_HEDLEY_PREDICT(expr, expected, probability) \
+    (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (JSON_HEDLEY_STATIC_CAST(void, expected), (expr)))
+#  define JSON_HEDLEY_PREDICT_TRUE(expr, probability) \
+    (__extension__ ({ \
+        double hedley_probability_ = (probability); \
+        ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \
+    }))
+#  define JSON_HEDLEY_PREDICT_FALSE(expr, probability) \
+    (__extension__ ({ \
+        double hedley_probability_ = (probability); \
+        ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \
+    }))
+#  define JSON_HEDLEY_LIKELY(expr)   __builtin_expect(!!(expr), 1)
+#  define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
+#else
+#  define JSON_HEDLEY_PREDICT(expr, expected, probability) (JSON_HEDLEY_STATIC_CAST(void, expected), (expr))
+#  define JSON_HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr))
+#  define JSON_HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr))
+#  define JSON_HEDLEY_LIKELY(expr) (!!(expr))
+#  define JSON_HEDLEY_UNLIKELY(expr) (!!(expr))
+#endif
+#if !defined(JSON_HEDLEY_UNPREDICTABLE)
+    #define JSON_HEDLEY_UNPREDICTABLE(expr) JSON_HEDLEY_PREDICT(expr, 1, 0.5)
+#endif
+
+#if defined(JSON_HEDLEY_MALLOC)
+    #undef JSON_HEDLEY_MALLOC
+#endif
+#if \
+    JSON_HEDLEY_HAS_ATTRIBUTE(malloc) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+    JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \
+    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
+    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
+    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
+    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_MALLOC __attribute__((__malloc__))
+#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)
+    #define JSON_HEDLEY_MALLOC _Pragma("returns_new_memory")
+#elif \
+    JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \
+    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+    #define JSON_HEDLEY_MALLOC __declspec(restrict)
+#else
+    #define JSON_HEDLEY_MALLOC
+#endif
+
+#if defined(JSON_HEDLEY_PURE)
+    #undef JSON_HEDLEY_PURE
+#endif
+#if \
+  JSON_HEDLEY_HAS_ATTRIBUTE(pure) || \
+  JSON_HEDLEY_GCC_VERSION_CHECK(2,96,0) || \
+  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+  JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
+  JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+  JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
+  JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+  (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+  JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
+  (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+  JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
+  (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+  JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
+  (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+  JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
+  JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+  JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
+  JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
+  JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+#  define JSON_HEDLEY_PURE __attribute__((__pure__))
+#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)
+#  define JSON_HEDLEY_PURE _Pragma("does_not_write_global_data")
+#elif defined(__cplusplus) && \
+    ( \
+      JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \
+      JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \
+      JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \
+    )
+#  define JSON_HEDLEY_PURE _Pragma("FUNC_IS_PURE;")
+#else
+#  define JSON_HEDLEY_PURE
+#endif
+
+#if defined(JSON_HEDLEY_CONST)
+    #undef JSON_HEDLEY_CONST
+#endif
+#if \
+    JSON_HEDLEY_HAS_ATTRIBUTE(const) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(2,5,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
+    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
+    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
+    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
+    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
+    JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_CONST __attribute__((__const__))
+#elif \
+    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)
+    #define JSON_HEDLEY_CONST _Pragma("no_side_effect")
+#else
+    #define JSON_HEDLEY_CONST JSON_HEDLEY_PURE
+#endif
+
+#if defined(JSON_HEDLEY_RESTRICT)
+    #undef JSON_HEDLEY_RESTRICT
+#endif
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus)
+    #define JSON_HEDLEY_RESTRICT restrict
+#elif \
+    JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \
+    JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
+    JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
+    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
+    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \
+    JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \
+    defined(__clang__) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_RESTRICT __restrict
+#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus)
+    #define JSON_HEDLEY_RESTRICT _Restrict
+#else
+    #define JSON_HEDLEY_RESTRICT
+#endif
+
+#if defined(JSON_HEDLEY_INLINE)
+    #undef JSON_HEDLEY_INLINE
+#endif
+#if \
+    (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
+    (defined(__cplusplus) && (__cplusplus >= 199711L))
+    #define JSON_HEDLEY_INLINE inline
+#elif \
+    defined(JSON_HEDLEY_GCC_VERSION) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(6,2,0)
+    #define JSON_HEDLEY_INLINE __inline__
+#elif \
+    JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \
+    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \
+    JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \
+    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_INLINE __inline
+#else
+    #define JSON_HEDLEY_INLINE
+#endif
+
+#if defined(JSON_HEDLEY_ALWAYS_INLINE)
+    #undef JSON_HEDLEY_ALWAYS_INLINE
+#endif
+#if \
+  JSON_HEDLEY_HAS_ATTRIBUTE(always_inline) || \
+  JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \
+  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+  JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
+  JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+  JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
+  JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+  (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+  JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
+  (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+  JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
+  (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+  JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
+  (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+  JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
+  JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+  JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
+  JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \
+  JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0)
+#  define JSON_HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) JSON_HEDLEY_INLINE
+#elif \
+  JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \
+  JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+#  define JSON_HEDLEY_ALWAYS_INLINE __forceinline
+#elif defined(__cplusplus) && \
+    ( \
+      JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
+      JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
+      JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
+      JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \
+      JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+      JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \
+    )
+#  define JSON_HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;")
+#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
+#  define JSON_HEDLEY_ALWAYS_INLINE _Pragma("inline=forced")
+#else
+#  define JSON_HEDLEY_ALWAYS_INLINE JSON_HEDLEY_INLINE
+#endif
+
+#if defined(JSON_HEDLEY_NEVER_INLINE)
+    #undef JSON_HEDLEY_NEVER_INLINE
+#endif
+#if \
+    JSON_HEDLEY_HAS_ATTRIBUTE(noinline) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
+    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
+    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
+    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
+    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
+    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
+    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
+    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \
+    JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0)
+    #define JSON_HEDLEY_NEVER_INLINE __attribute__((__noinline__))
+#elif \
+    JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \
+    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+    #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline)
+#elif JSON_HEDLEY_PGI_VERSION_CHECK(10,2,0)
+    #define JSON_HEDLEY_NEVER_INLINE _Pragma("noinline")
+#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus)
+    #define JSON_HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;")
+#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
+    #define JSON_HEDLEY_NEVER_INLINE _Pragma("inline=never")
+#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0)
+    #define JSON_HEDLEY_NEVER_INLINE __attribute((noinline))
+#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0)
+    #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline)
+#else
+    #define JSON_HEDLEY_NEVER_INLINE
+#endif
+
+#if defined(JSON_HEDLEY_PRIVATE)
+    #undef JSON_HEDLEY_PRIVATE
+#endif
+#if defined(JSON_HEDLEY_PUBLIC)
+    #undef JSON_HEDLEY_PUBLIC
+#endif
+#if defined(JSON_HEDLEY_IMPORT)
+    #undef JSON_HEDLEY_IMPORT
+#endif
+#if defined(_WIN32) || defined(__CYGWIN__)
+#  define JSON_HEDLEY_PRIVATE
+#  define JSON_HEDLEY_PUBLIC   __declspec(dllexport)
+#  define JSON_HEDLEY_IMPORT   __declspec(dllimport)
+#else
+#  if \
+    JSON_HEDLEY_HAS_ATTRIBUTE(visibility) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \
+    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+    JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \
+    ( \
+      defined(__TI_EABI__) && \
+      ( \
+        (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
+        JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \
+      ) \
+    ) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+#    define JSON_HEDLEY_PRIVATE __attribute__((__visibility__("hidden")))
+#    define JSON_HEDLEY_PUBLIC  __attribute__((__visibility__("default")))
+#  else
+#    define JSON_HEDLEY_PRIVATE
+#    define JSON_HEDLEY_PUBLIC
+#  endif
+#  define JSON_HEDLEY_IMPORT    extern
+#endif
+
+#if defined(JSON_HEDLEY_NO_THROW)
+    #undef JSON_HEDLEY_NO_THROW
+#endif
+#if \
+    JSON_HEDLEY_HAS_ATTRIBUTE(nothrow) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_NO_THROW __attribute__((__nothrow__))
+#elif \
+    JSON_HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \
+    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0)
+    #define JSON_HEDLEY_NO_THROW __declspec(nothrow)
+#else
+    #define JSON_HEDLEY_NO_THROW
+#endif
+
+#if defined(JSON_HEDLEY_FALL_THROUGH)
+    #undef JSON_HEDLEY_FALL_THROUGH
+#endif
+#if \
+    JSON_HEDLEY_HAS_ATTRIBUTE(fallthrough) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(7,0,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_FALL_THROUGH __attribute__((__fallthrough__))
+#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough)
+    #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]])
+#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough)
+    #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]])
+#elif defined(__fallthrough) /* SAL */
+    #define JSON_HEDLEY_FALL_THROUGH __fallthrough
+#else
+    #define JSON_HEDLEY_FALL_THROUGH
+#endif
+
+#if defined(JSON_HEDLEY_RETURNS_NON_NULL)
+    #undef JSON_HEDLEY_RETURNS_NON_NULL
+#endif
+#if \
+    JSON_HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__))
+#elif defined(_Ret_notnull_) /* SAL */
+    #define JSON_HEDLEY_RETURNS_NON_NULL _Ret_notnull_
+#else
+    #define JSON_HEDLEY_RETURNS_NON_NULL
+#endif
+
+#if defined(JSON_HEDLEY_ARRAY_PARAM)
+    #undef JSON_HEDLEY_ARRAY_PARAM
+#endif
+#if \
+    defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
+    !defined(__STDC_NO_VLA__) && \
+    !defined(__cplusplus) && \
+    !defined(JSON_HEDLEY_PGI_VERSION) && \
+    !defined(JSON_HEDLEY_TINYC_VERSION)
+    #define JSON_HEDLEY_ARRAY_PARAM(name) (name)
+#else
+    #define JSON_HEDLEY_ARRAY_PARAM(name)
+#endif
+
+#if defined(JSON_HEDLEY_IS_CONSTANT)
+    #undef JSON_HEDLEY_IS_CONSTANT
+#endif
+#if defined(JSON_HEDLEY_REQUIRE_CONSTEXPR)
+    #undef JSON_HEDLEY_REQUIRE_CONSTEXPR
+#endif
+/* JSON_HEDLEY_IS_CONSTEXPR_ is for
+   HEDLEY INTERNAL USE ONLY.  API subject to change without notice. */
+#if defined(JSON_HEDLEY_IS_CONSTEXPR_)
+    #undef JSON_HEDLEY_IS_CONSTEXPR_
+#endif
+#if \
+    JSON_HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \
+    JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
+    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \
+    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
+    JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \
+    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \
+    (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \
+    JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \
+    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
+    #define JSON_HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr)
+#endif
+#if !defined(__cplusplus)
+#  if \
+       JSON_HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \
+       JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
+       JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+       JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \
+       JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \
+       JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \
+       JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,24)
+#if defined(__INTPTR_TYPE__)
+    #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*)
+#else
+    #include <stdint.h>
+    #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*)
+#endif
+#  elif \
+       ( \
+          defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \
+          !defined(JSON_HEDLEY_SUNPRO_VERSION) && \
+          !defined(JSON_HEDLEY_PGI_VERSION) && \
+          !defined(JSON_HEDLEY_IAR_VERSION)) || \
+       (JSON_HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(JSON_HEDLEY_IAR_VERSION)) || \
+       JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \
+       JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \
+       JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \
+       JSON_HEDLEY_ARM_VERSION_CHECK(5,3,0)
+#if defined(__INTPTR_TYPE__)
+    #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0)
+#else
+    #include <stdint.h>
+    #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0)
+#endif
+#  elif \
+       defined(JSON_HEDLEY_GCC_VERSION) || \
+       defined(JSON_HEDLEY_INTEL_VERSION) || \
+       defined(JSON_HEDLEY_TINYC_VERSION) || \
+       defined(JSON_HEDLEY_TI_ARMCL_VERSION) || \
+       JSON_HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \
+       defined(JSON_HEDLEY_TI_CL2000_VERSION) || \
+       defined(JSON_HEDLEY_TI_CL6X_VERSION) || \
+       defined(JSON_HEDLEY_TI_CL7X_VERSION) || \
+       defined(JSON_HEDLEY_TI_CLPRU_VERSION) || \
+       defined(__clang__)
+#    define JSON_HEDLEY_IS_CONSTEXPR_(expr) ( \
+        sizeof(void) != \
+        sizeof(*( \
+                  1 ? \
+                  ((void*) ((expr) * 0L) ) : \
+((struct { char v[sizeof(void) * 2]; } *) 1) \
+                ) \
+              ) \
+                                            )
+#  endif
+#endif
+#if defined(JSON_HEDLEY_IS_CONSTEXPR_)
+    #if !defined(JSON_HEDLEY_IS_CONSTANT)
+        #define JSON_HEDLEY_IS_CONSTANT(expr) JSON_HEDLEY_IS_CONSTEXPR_(expr)
+    #endif
+    #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (JSON_HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1))
+#else
+    #if !defined(JSON_HEDLEY_IS_CONSTANT)
+        #define JSON_HEDLEY_IS_CONSTANT(expr) (0)
+    #endif
+    #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (expr)
+#endif
+
+#if defined(JSON_HEDLEY_BEGIN_C_DECLS)
+    #undef JSON_HEDLEY_BEGIN_C_DECLS
+#endif
+#if defined(JSON_HEDLEY_END_C_DECLS)
+    #undef JSON_HEDLEY_END_C_DECLS
+#endif
+#if defined(JSON_HEDLEY_C_DECL)
+    #undef JSON_HEDLEY_C_DECL
+#endif
+#if defined(__cplusplus)
+    #define JSON_HEDLEY_BEGIN_C_DECLS extern "C" {
+    #define JSON_HEDLEY_END_C_DECLS }
+    #define JSON_HEDLEY_C_DECL extern "C"
+#else
+    #define JSON_HEDLEY_BEGIN_C_DECLS
+    #define JSON_HEDLEY_END_C_DECLS
+    #define JSON_HEDLEY_C_DECL
+#endif
+
+#if defined(JSON_HEDLEY_STATIC_ASSERT)
+    #undef JSON_HEDLEY_STATIC_ASSERT
+#endif
+#if \
+  !defined(__cplusplus) && ( \
+      (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
+      (JSON_HEDLEY_HAS_FEATURE(c_static_assert) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \
+      JSON_HEDLEY_GCC_VERSION_CHECK(6,0,0) || \
+      JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+      defined(_Static_assert) \
+    )
+#  define JSON_HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message)
+#elif \
+  (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
+  JSON_HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \
+  JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+#  define JSON_HEDLEY_STATIC_ASSERT(expr, message) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message))
+#else
+#  define JSON_HEDLEY_STATIC_ASSERT(expr, message)
+#endif
+
+#if defined(JSON_HEDLEY_NULL)
+    #undef JSON_HEDLEY_NULL
+#endif
+#if defined(__cplusplus)
+    #if __cplusplus >= 201103L
+        #define JSON_HEDLEY_NULL JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr)
+    #elif defined(NULL)
+        #define JSON_HEDLEY_NULL NULL
+    #else
+        #define JSON_HEDLEY_NULL JSON_HEDLEY_STATIC_CAST(void*, 0)
+    #endif
+#elif defined(NULL)
+    #define JSON_HEDLEY_NULL NULL
+#else
+    #define JSON_HEDLEY_NULL ((void*) 0)
+#endif
+
+#if defined(JSON_HEDLEY_MESSAGE)
+    #undef JSON_HEDLEY_MESSAGE
+#endif
+#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas")
+#  define JSON_HEDLEY_MESSAGE(msg) \
+    JSON_HEDLEY_DIAGNOSTIC_PUSH \
+    JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
+    JSON_HEDLEY_PRAGMA(message msg) \
+    JSON_HEDLEY_DIAGNOSTIC_POP
+#elif \
+  JSON_HEDLEY_GCC_VERSION_CHECK(4,4,0) || \
+  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
+#  define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message msg)
+#elif JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0)
+#  define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(_CRI message msg)
+#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
+#  define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg))
+#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,0,0)
+#  define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg))
+#else
+#  define JSON_HEDLEY_MESSAGE(msg)
+#endif
+
+#if defined(JSON_HEDLEY_WARNING)
+    #undef JSON_HEDLEY_WARNING
+#endif
+#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas")
+#  define JSON_HEDLEY_WARNING(msg) \
+    JSON_HEDLEY_DIAGNOSTIC_PUSH \
+    JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
+    JSON_HEDLEY_PRAGMA(clang warning msg) \
+    JSON_HEDLEY_DIAGNOSTIC_POP
+#elif \
+  JSON_HEDLEY_GCC_VERSION_CHECK(4,8,0) || \
+  JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \
+  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
+#  define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(GCC warning msg)
+#elif \
+  JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \
+  JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+#  define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(message(msg))
+#else
+#  define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_MESSAGE(msg)
+#endif
+
+#if defined(JSON_HEDLEY_REQUIRE)
+    #undef JSON_HEDLEY_REQUIRE
+#endif
+#if defined(JSON_HEDLEY_REQUIRE_MSG)
+    #undef JSON_HEDLEY_REQUIRE_MSG
+#endif
+#if JSON_HEDLEY_HAS_ATTRIBUTE(diagnose_if)
+#  if JSON_HEDLEY_HAS_WARNING("-Wgcc-compat")
+#    define JSON_HEDLEY_REQUIRE(expr) \
+    JSON_HEDLEY_DIAGNOSTIC_PUSH \
+    _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \
+    __attribute__((diagnose_if(!(expr), #expr, "error"))) \
+    JSON_HEDLEY_DIAGNOSTIC_POP
+#    define JSON_HEDLEY_REQUIRE_MSG(expr,msg) \
+    JSON_HEDLEY_DIAGNOSTIC_PUSH \
+    _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \
+    __attribute__((diagnose_if(!(expr), msg, "error"))) \
+    JSON_HEDLEY_DIAGNOSTIC_POP
+#  else
+#    define JSON_HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error")))
+#    define JSON_HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error")))
+#  endif
+#else
+#  define JSON_HEDLEY_REQUIRE(expr)
+#  define JSON_HEDLEY_REQUIRE_MSG(expr,msg)
+#endif
+
+#if defined(JSON_HEDLEY_FLAGS)
+    #undef JSON_HEDLEY_FLAGS
+#endif
+#if JSON_HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || JSON_HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion"))
+    #define JSON_HEDLEY_FLAGS __attribute__((__flag_enum__))
+#else
+    #define JSON_HEDLEY_FLAGS
+#endif
+
+#if defined(JSON_HEDLEY_FLAGS_CAST)
+    #undef JSON_HEDLEY_FLAGS_CAST
+#endif
+#if JSON_HEDLEY_INTEL_VERSION_CHECK(19,0,0)
+#  define JSON_HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \
+        JSON_HEDLEY_DIAGNOSTIC_PUSH \
+        _Pragma("warning(disable:188)") \
+        ((T) (expr)); \
+        JSON_HEDLEY_DIAGNOSTIC_POP \
+    }))
+#else
+#  define JSON_HEDLEY_FLAGS_CAST(T, expr) JSON_HEDLEY_STATIC_CAST(T, expr)
+#endif
+
+#if defined(JSON_HEDLEY_EMPTY_BASES)
+    #undef JSON_HEDLEY_EMPTY_BASES
+#endif
+#if \
+    (JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !JSON_HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \
+    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
+    #define JSON_HEDLEY_EMPTY_BASES __declspec(empty_bases)
+#else
+    #define JSON_HEDLEY_EMPTY_BASES
+#endif
+
+/* Remaining macros are deprecated. */
+
+#if defined(JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK)
+    #undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK
+#endif
+#if defined(__clang__)
+    #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0)
+#else
+    #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
+#endif
+
+#if defined(JSON_HEDLEY_CLANG_HAS_ATTRIBUTE)
+    #undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE
+#endif
+#define JSON_HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_ATTRIBUTE(attribute)
+
+#if defined(JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE)
+    #undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE
+#endif
+#define JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute)
+
+#if defined(JSON_HEDLEY_CLANG_HAS_BUILTIN)
+    #undef JSON_HEDLEY_CLANG_HAS_BUILTIN
+#endif
+#define JSON_HEDLEY_CLANG_HAS_BUILTIN(builtin) JSON_HEDLEY_HAS_BUILTIN(builtin)
+
+#if defined(JSON_HEDLEY_CLANG_HAS_FEATURE)
+    #undef JSON_HEDLEY_CLANG_HAS_FEATURE
+#endif
+#define JSON_HEDLEY_CLANG_HAS_FEATURE(feature) JSON_HEDLEY_HAS_FEATURE(feature)
+
+#if defined(JSON_HEDLEY_CLANG_HAS_EXTENSION)
+    #undef JSON_HEDLEY_CLANG_HAS_EXTENSION
+#endif
+#define JSON_HEDLEY_CLANG_HAS_EXTENSION(extension) JSON_HEDLEY_HAS_EXTENSION(extension)
+
+#if defined(JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE)
+    #undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE
+#endif
+#define JSON_HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute)
+
+#if defined(JSON_HEDLEY_CLANG_HAS_WARNING)
+    #undef JSON_HEDLEY_CLANG_HAS_WARNING
+#endif
+#define JSON_HEDLEY_CLANG_HAS_WARNING(warning) JSON_HEDLEY_HAS_WARNING(warning)
+
+#endif /* !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < X) */
+
+
+// This file contains all internal macro definitions (except those affecting ABI)
+// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them
+
+// #include <qualla/detail/abi_macros.hpp>
+
+
+// exclude unsupported compilers
+#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK)
+    #if defined(__clang__)
+        #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400
+            #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers"
+        #endif
+    #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER))
+        #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800
+            #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers"
+        #endif
+    #endif
+#endif
+
+// C++ language standard detection
+// if the user manually specified the used c++ version this is skipped
+#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11)
+    #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
+        #define JSON_HAS_CPP_20
+        #define JSON_HAS_CPP_17
+        #define JSON_HAS_CPP_14
+    #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464
+        #define JSON_HAS_CPP_17
+        #define JSON_HAS_CPP_14
+    #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1)
+        #define JSON_HAS_CPP_14
+    #endif
+    // the cpp 11 flag is always specified because it is the minimal required version
+    #define JSON_HAS_CPP_11
+#endif
+
+#ifdef __has_include
+    #if __has_include(<version>)
+        #include <version>
+    #endif
+#endif
+
+#if !defined(JSON_HAS_FILESYSTEM) && !defined(JSON_HAS_EXPERIMENTAL_FILESYSTEM)
+    #ifdef JSON_HAS_CPP_17
+        #if defined(__cpp_lib_filesystem)
+            #define JSON_HAS_FILESYSTEM 1
+        #elif defined(__cpp_lib_experimental_filesystem)
+            #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1
+        #elif !defined(__has_include)
+            #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1
+        #elif __has_include(<filesystem>)
+            #define JSON_HAS_FILESYSTEM 1
+        #elif __has_include(<experimental/filesystem>)
+            #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1
+        #endif
+
+        // std::filesystem does not work on MinGW GCC 8: https://sourceforge.net/p/mingw-w64/bugs/737/
+        #if defined(__MINGW32__) && defined(__GNUC__) && __GNUC__ == 8
+            #undef JSON_HAS_FILESYSTEM
+            #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
+        #endif
+
+        // no filesystem support before GCC 8: https://en.cppreference.com/w/cpp/compiler_support
+        #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 8
+            #undef JSON_HAS_FILESYSTEM
+            #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
+        #endif
+
+        // no filesystem support before Clang 7: https://en.cppreference.com/w/cpp/compiler_support
+        #if defined(__clang_major__) && __clang_major__ < 7
+            #undef JSON_HAS_FILESYSTEM
+            #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
+        #endif
+
+        // no filesystem support before MSVC 19.14: https://en.cppreference.com/w/cpp/compiler_support
+        #if defined(_MSC_VER) && _MSC_VER < 1914
+            #undef JSON_HAS_FILESYSTEM
+            #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
+        #endif
+
+        // no filesystem support before iOS 13
+        #if defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 130000
+            #undef JSON_HAS_FILESYSTEM
+            #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
+        #endif
+
+        // no filesystem support before macOS Catalina
+        #if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101500
+            #undef JSON_HAS_FILESYSTEM
+            #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
+        #endif
+    #endif
+#endif
+
+#ifndef JSON_HAS_EXPERIMENTAL_FILESYSTEM
+    #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 0
+#endif
+
+#ifndef JSON_HAS_FILESYSTEM
+    #define JSON_HAS_FILESYSTEM 0
+#endif
+
+#ifndef JSON_HAS_THREE_WAY_COMPARISON
+    #if defined(__cpp_impl_three_way_comparison) && __cpp_impl_three_way_comparison >= 201907L \
+        && defined(__cpp_lib_three_way_comparison) && __cpp_lib_three_way_comparison >= 201907L
+        #define JSON_HAS_THREE_WAY_COMPARISON 1
+    #else
+        #define JSON_HAS_THREE_WAY_COMPARISON 0
+    #endif
+#endif
+
+#ifndef JSON_HAS_RANGES
+    // ranges header shipping in GCC 11.1.0 (released 2021-04-27) has syntax error
+    #if defined(__GLIBCXX__) && __GLIBCXX__ == 20210427
+        #define JSON_HAS_RANGES 0
+    #elif defined(__cpp_lib_ranges)
+        #define JSON_HAS_RANGES 1
+    #else
+        #define JSON_HAS_RANGES 0
+    #endif
+#endif
+
+#ifdef JSON_HAS_CPP_17
+    #define JSON_INLINE_VARIABLE inline
+#else
+    #define JSON_INLINE_VARIABLE
+#endif
+
+#if JSON_HEDLEY_HAS_ATTRIBUTE(no_unique_address)
+    #define JSON_NO_UNIQUE_ADDRESS [[no_unique_address]]
+#else
+    #define JSON_NO_UNIQUE_ADDRESS
+#endif
+
+// disable documentation warnings on clang
+#if defined(__clang__)
+    #pragma clang diagnostic push
+    #pragma clang diagnostic ignored "-Wdocumentation"
+    #pragma clang diagnostic ignored "-Wdocumentation-unknown-command"
+#endif
+
+// allow disabling exceptions
+#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION)
+    #define JSON_THROW(exception) throw exception
+    #define JSON_TRY try
+    #define JSON_CATCH(exception) catch(exception)
+    #define JSON_INTERNAL_CATCH(exception) catch(exception)
+#else
+    #include <cstdlib>
+    #define JSON_THROW(exception) std::abort()
+    #define JSON_TRY if(true)
+    #define JSON_CATCH(exception) if(false)
+    #define JSON_INTERNAL_CATCH(exception) if(false)
+#endif
+
+// override exception macros
+#if defined(JSON_THROW_USER)
+    #undef JSON_THROW
+    #define JSON_THROW JSON_THROW_USER
+#endif
+#if defined(JSON_TRY_USER)
+    #undef JSON_TRY
+    #define JSON_TRY JSON_TRY_USER
+#endif
+#if defined(JSON_CATCH_USER)
+    #undef JSON_CATCH
+    #define JSON_CATCH JSON_CATCH_USER
+    #undef JSON_INTERNAL_CATCH
+    #define JSON_INTERNAL_CATCH JSON_CATCH_USER
+#endif
+#if defined(JSON_INTERNAL_CATCH_USER)
+    #undef JSON_INTERNAL_CATCH
+    #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER
+#endif
+
+// allow overriding assert
+#if !defined(JSON_ASSERT)
+    #include <cassert> // assert
+    #define JSON_ASSERT(x) assert(x)
+#endif
+
+// allow to access some private functions (needed by the test suite)
+#if defined(JSON_TESTS_PRIVATE)
+    #define JSON_PRIVATE_UNLESS_TESTED public
+#else
+    #define JSON_PRIVATE_UNLESS_TESTED private
+#endif
+
+/*!
+@brief macro to briefly define a mapping between an enum and JSON
+@def NLOHMANN_JSON_SERIALIZE_ENUM
+@since version 3.4.0
+*/
+#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...)                                            \
+    template<typename BasicJsonType>                                                            \
+    inline void to_json(BasicJsonType& j, const ENUM_TYPE& e)                                   \
+    {                                                                                           \
+        static_assert(std::is_enum<ENUM_TYPE>::value, #ENUM_TYPE " must be an enum!");          \
+        static const std::pair<ENUM_TYPE, BasicJsonType> m[] = __VA_ARGS__;                     \
+        auto it = std::find_if(std::begin(m), std::end(m),                                      \
+                               [e](const std::pair<ENUM_TYPE, BasicJsonType>& ej_pair) -> bool  \
+        {                                                                                       \
+            return ej_pair.first == e;                                                          \
+        });                                                                                     \
+        j = ((it != std::end(m)) ? it : std::begin(m))->second;                                 \
+    }                                                                                           \
+    template<typename BasicJsonType>                                                            \
+    inline void from_json(const BasicJsonType& j, ENUM_TYPE& e)                                 \
+    {                                                                                           \
+        static_assert(std::is_enum<ENUM_TYPE>::value, #ENUM_TYPE " must be an enum!");          \
+        static const std::pair<ENUM_TYPE, BasicJsonType> m[] = __VA_ARGS__;                     \
+        auto it = std::find_if(std::begin(m), std::end(m),                                      \
+                               [&j](const std::pair<ENUM_TYPE, BasicJsonType>& ej_pair) -> bool \
+        {                                                                                       \
+            return ej_pair.second == j;                                                         \
+        });                                                                                     \
+        e = ((it != std::end(m)) ? it : std::begin(m))->first;                                  \
+    }
+
+// Ugly macros to avoid uglier copy-paste when specializing basic_json. They
+// may be removed in the future once the class is split.
+
+#define NLOHMANN_BASIC_JSON_TPL_DECLARATION                                \
+    template<template<typename, typename, typename...> class ObjectType,   \
+             template<typename, typename...> class ArrayType,              \
+             class StringType, class BooleanType, class NumberIntegerType, \
+             class NumberUnsignedType, class NumberFloatType,              \
+             template<typename> class AllocatorType,                       \
+             template<typename, typename = void> class JSONSerializer,     \
+             class BinaryType>
+
+#define NLOHMANN_BASIC_JSON_TPL                                            \
+    basic_json<ObjectType, ArrayType, StringType, BooleanType,             \
+    NumberIntegerType, NumberUnsignedType, NumberFloatType,                \
+    AllocatorType, JSONSerializer, BinaryType>
+
+// Macros to simplify conversion from/to types
+
+#define NLOHMANN_JSON_EXPAND( x ) x
+#define NLOHMANN_JSON_GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, _64, NAME,...) NAME
+#define NLOHMANN_JSON_PASTE(...) NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_GET_MACRO(__VA_ARGS__, \
+        NLOHMANN_JSON_PASTE64, \
+        NLOHMANN_JSON_PASTE63, \
+        NLOHMANN_JSON_PASTE62, \
+        NLOHMANN_JSON_PASTE61, \
+        NLOHMANN_JSON_PASTE60, \
+        NLOHMANN_JSON_PASTE59, \
+        NLOHMANN_JSON_PASTE58, \
+        NLOHMANN_JSON_PASTE57, \
+        NLOHMANN_JSON_PASTE56, \
+        NLOHMANN_JSON_PASTE55, \
+        NLOHMANN_JSON_PASTE54, \
+        NLOHMANN_JSON_PASTE53, \
+        NLOHMANN_JSON_PASTE52, \
+        NLOHMANN_JSON_PASTE51, \
+        NLOHMANN_JSON_PASTE50, \
+        NLOHMANN_JSON_PASTE49, \
+        NLOHMANN_JSON_PASTE48, \
+        NLOHMANN_JSON_PASTE47, \
+        NLOHMANN_JSON_PASTE46, \
+        NLOHMANN_JSON_PASTE45, \
+        NLOHMANN_JSON_PASTE44, \
+        NLOHMANN_JSON_PASTE43, \
+        NLOHMANN_JSON_PASTE42, \
+        NLOHMANN_JSON_PASTE41, \
+        NLOHMANN_JSON_PASTE40, \
+        NLOHMANN_JSON_PASTE39, \
+        NLOHMANN_JSON_PASTE38, \
+        NLOHMANN_JSON_PASTE37, \
+        NLOHMANN_JSON_PASTE36, \
+        NLOHMANN_JSON_PASTE35, \
+        NLOHMANN_JSON_PASTE34, \
+        NLOHMANN_JSON_PASTE33, \
+        NLOHMANN_JSON_PASTE32, \
+        NLOHMANN_JSON_PASTE31, \
+        NLOHMANN_JSON_PASTE30, \
+        NLOHMANN_JSON_PASTE29, \
+        NLOHMANN_JSON_PASTE28, \
+        NLOHMANN_JSON_PASTE27, \
+        NLOHMANN_JSON_PASTE26, \
+        NLOHMANN_JSON_PASTE25, \
+        NLOHMANN_JSON_PASTE24, \
+        NLOHMANN_JSON_PASTE23, \
+        NLOHMANN_JSON_PASTE22, \
+        NLOHMANN_JSON_PASTE21, \
+        NLOHMANN_JSON_PASTE20, \
+        NLOHMANN_JSON_PASTE19, \
+        NLOHMANN_JSON_PASTE18, \
+        NLOHMANN_JSON_PASTE17, \
+        NLOHMANN_JSON_PASTE16, \
+        NLOHMANN_JSON_PASTE15, \
+        NLOHMANN_JSON_PASTE14, \
+        NLOHMANN_JSON_PASTE13, \
+        NLOHMANN_JSON_PASTE12, \
+        NLOHMANN_JSON_PASTE11, \
+        NLOHMANN_JSON_PASTE10, \
+        NLOHMANN_JSON_PASTE9, \
+        NLOHMANN_JSON_PASTE8, \
+        NLOHMANN_JSON_PASTE7, \
+        NLOHMANN_JSON_PASTE6, \
+        NLOHMANN_JSON_PASTE5, \
+        NLOHMANN_JSON_PASTE4, \
+        NLOHMANN_JSON_PASTE3, \
+        NLOHMANN_JSON_PASTE2, \
+        NLOHMANN_JSON_PASTE1)(__VA_ARGS__))
+#define NLOHMANN_JSON_PASTE2(func, v1) func(v1)
+#define NLOHMANN_JSON_PASTE3(func, v1, v2) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE2(func, v2)
+#define NLOHMANN_JSON_PASTE4(func, v1, v2, v3) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE3(func, v2, v3)
+#define NLOHMANN_JSON_PASTE5(func, v1, v2, v3, v4) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE4(func, v2, v3, v4)
+#define NLOHMANN_JSON_PASTE6(func, v1, v2, v3, v4, v5) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE5(func, v2, v3, v4, v5)
+#define NLOHMANN_JSON_PASTE7(func, v1, v2, v3, v4, v5, v6) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE6(func, v2, v3, v4, v5, v6)
+#define NLOHMANN_JSON_PASTE8(func, v1, v2, v3, v4, v5, v6, v7) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE7(func, v2, v3, v4, v5, v6, v7)
+#define NLOHMANN_JSON_PASTE9(func, v1, v2, v3, v4, v5, v6, v7, v8) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE8(func, v2, v3, v4, v5, v6, v7, v8)
+#define NLOHMANN_JSON_PASTE10(func, v1, v2, v3, v4, v5, v6, v7, v8, v9) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE9(func, v2, v3, v4, v5, v6, v7, v8, v9)
+#define NLOHMANN_JSON_PASTE11(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE10(func, v2, v3, v4, v5, v6, v7, v8, v9, v10)
+#define NLOHMANN_JSON_PASTE12(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE11(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11)
+#define NLOHMANN_JSON_PASTE13(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE12(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12)
+#define NLOHMANN_JSON_PASTE14(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE13(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13)
+#define NLOHMANN_JSON_PASTE15(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE14(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14)
+#define NLOHMANN_JSON_PASTE16(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE15(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15)
+#define NLOHMANN_JSON_PASTE17(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE16(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16)
+#define NLOHMANN_JSON_PASTE18(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE17(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17)
+#define NLOHMANN_JSON_PASTE19(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE18(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18)
+#define NLOHMANN_JSON_PASTE20(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE19(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19)
+#define NLOHMANN_JSON_PASTE21(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE20(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20)
+#define NLOHMANN_JSON_PASTE22(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE21(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21)
+#define NLOHMANN_JSON_PASTE23(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE22(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22)
+#define NLOHMANN_JSON_PASTE24(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE23(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23)
+#define NLOHMANN_JSON_PASTE25(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE24(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24)
+#define NLOHMANN_JSON_PASTE26(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE25(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25)
+#define NLOHMANN_JSON_PASTE27(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE26(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26)
+#define NLOHMANN_JSON_PASTE28(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE27(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27)
+#define NLOHMANN_JSON_PASTE29(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE28(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28)
+#define NLOHMANN_JSON_PASTE30(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE29(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29)
+#define NLOHMANN_JSON_PASTE31(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE30(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30)
+#define NLOHMANN_JSON_PASTE32(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE31(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31)
+#define NLOHMANN_JSON_PASTE33(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE32(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32)
+#define NLOHMANN_JSON_PASTE34(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE33(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33)
+#define NLOHMANN_JSON_PASTE35(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE34(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34)
+#define NLOHMANN_JSON_PASTE36(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE35(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35)
+#define NLOHMANN_JSON_PASTE37(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE36(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36)
+#define NLOHMANN_JSON_PASTE38(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE37(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37)
+#define NLOHMANN_JSON_PASTE39(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE38(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38)
+#define NLOHMANN_JSON_PASTE40(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE39(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39)
+#define NLOHMANN_JSON_PASTE41(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE40(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40)
+#define NLOHMANN_JSON_PASTE42(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE41(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41)
+#define NLOHMANN_JSON_PASTE43(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE42(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42)
+#define NLOHMANN_JSON_PASTE44(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE43(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43)
+#define NLOHMANN_JSON_PASTE45(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE44(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44)
+#define NLOHMANN_JSON_PASTE46(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE45(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45)
+#define NLOHMANN_JSON_PASTE47(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE46(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46)
+#define NLOHMANN_JSON_PASTE48(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE47(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47)
+#define NLOHMANN_JSON_PASTE49(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE48(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48)
+#define NLOHMANN_JSON_PASTE50(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE49(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49)
+#define NLOHMANN_JSON_PASTE51(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE50(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50)
+#define NLOHMANN_JSON_PASTE52(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE51(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51)
+#define NLOHMANN_JSON_PASTE53(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE52(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52)
+#define NLOHMANN_JSON_PASTE54(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE53(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53)
+#define NLOHMANN_JSON_PASTE55(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE54(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54)
+#define NLOHMANN_JSON_PASTE56(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE55(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55)
+#define NLOHMANN_JSON_PASTE57(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE56(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56)
+#define NLOHMANN_JSON_PASTE58(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE57(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57)
+#define NLOHMANN_JSON_PASTE59(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE58(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58)
+#define NLOHMANN_JSON_PASTE60(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE59(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59)
+#define NLOHMANN_JSON_PASTE61(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE60(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60)
+#define NLOHMANN_JSON_PASTE62(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE61(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61)
+#define NLOHMANN_JSON_PASTE63(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE62(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62)
+#define NLOHMANN_JSON_PASTE64(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE63(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63)
+
+#define NLOHMANN_JSON_TO(v1) qualla_json_j[#v1] = qualla_json_t.v1;
+#define NLOHMANN_JSON_FROM(v1) qualla_json_j.at(#v1).get_to(qualla_json_t.v1);
+#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) qualla_json_t.v1 = qualla_json_j.value(#v1, qualla_json_default_obj.v1);
+
+/*!
+@brief macro
+@def NLOHMANN_DEFINE_TYPE_INTRUSIVE
+@since version 3.9.0
+*/
+#define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...)  \
+    friend void to_json(qualla::json& qualla_json_j, const Type& qualla_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \
+    friend void from_json(const qualla::json& qualla_json_j, Type& qualla_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) }
+
+#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...)  \
+    friend void to_json(qualla::json& qualla_json_j, const Type& qualla_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \
+    friend void from_json(const qualla::json& qualla_json_j, Type& qualla_json_t) { Type qualla_json_default_obj; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) }
+
+/*!
+@brief macro
+@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE
+@since version 3.9.0
+*/
+#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...)  \
+    inline void to_json(qualla::json& qualla_json_j, const Type& qualla_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \
+    inline void from_json(const qualla::json& qualla_json_j, Type& qualla_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) }
+
+#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...)  \
+    inline void to_json(qualla::json& qualla_json_j, const Type& qualla_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \
+    inline void from_json(const qualla::json& qualla_json_j, Type& qualla_json_t) { Type qualla_json_default_obj; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) }
+
+
+// inspired from https://stackoverflow.com/a/26745591
+// allows to call any std function as if (e.g. with begin):
+// using std::begin; begin(x);
+//
+// it allows using the detected idiom to retrieve the return type
+// of such an expression
+#define NLOHMANN_CAN_CALL_STD_FUNC_IMPL(std_name)                                 \
+    namespace detail {                                                            \
+    using std::std_name;                                                          \
+    \
+    template<typename... T>                                                       \
+    using result_of_##std_name = decltype(std_name(std::declval<T>()...));        \
+    }                                                                             \
+    \
+    namespace detail2 {                                                           \
+    struct std_name##_tag                                                         \
+    {                                                                             \
+    };                                                                            \
+    \
+    template<typename... T>                                                       \
+    std_name##_tag std_name(T&&...);                                              \
+    \
+    template<typename... T>                                                       \
+    using result_of_##std_name = decltype(std_name(std::declval<T>()...));        \
+    \
+    template<typename... T>                                                       \
+    struct would_call_std_##std_name                                              \
+    {                                                                             \
+        static constexpr auto const value = ::qualla::detail::                  \
+                                            is_detected_exact<std_name##_tag, result_of_##std_name, T...>::value; \
+    };                                                                            \
+    } /* namespace detail2 */ \
+    \
+    template<typename... T>                                                       \
+    struct would_call_std_##std_name : detail2::would_call_std_##std_name<T...>   \
+    {                                                                             \
+    }
+
+#ifndef JSON_USE_IMPLICIT_CONVERSIONS
+    #define JSON_USE_IMPLICIT_CONVERSIONS 1
+#endif
+
+#if JSON_USE_IMPLICIT_CONVERSIONS
+    #define JSON_EXPLICIT
+#else
+    #define JSON_EXPLICIT explicit
+#endif
+
+#ifndef JSON_DISABLE_ENUM_SERIALIZATION
+    #define JSON_DISABLE_ENUM_SERIALIZATION 0
+#endif
+
+#ifndef JSON_USE_GLOBAL_UDLS
+    #define JSON_USE_GLOBAL_UDLS 1
+#endif
+
+#if JSON_HAS_THREE_WAY_COMPARISON
+    #include <compare> // partial_ordering
+#endif
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+///////////////////////////
+// JSON type enumeration //
+///////////////////////////
+
+/*!
+@brief the JSON type enumeration
+
+This enumeration collects the different JSON types. It is internally used to
+distinguish the stored values, and the functions @ref basic_json::is_null(),
+@ref basic_json::is_object(), @ref basic_json::is_array(),
+@ref basic_json::is_string(), @ref basic_json::is_boolean(),
+@ref basic_json::is_number() (with @ref basic_json::is_number_integer(),
+@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()),
+@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and
+@ref basic_json::is_structured() rely on it.
+
+@note There are three enumeration entries (number_integer, number_unsigned, and
+number_float), because the library distinguishes these three types for numbers:
+@ref basic_json::number_unsigned_t is used for unsigned integers,
+@ref basic_json::number_integer_t is used for signed integers, and
+@ref basic_json::number_float_t is used for floating-point numbers or to
+approximate integers which do not fit in the limits of their respective type.
+
+@sa see @ref basic_json::basic_json(const value_t value_type) -- create a JSON
+value with the default value for a given type
+
+@since version 1.0.0
+*/
+enum class value_t : std::uint8_t
+{
+    null,             ///< null value
+    object,           ///< object (unordered set of name/value pairs)
+    array,            ///< array (ordered collection of values)
+    string,           ///< string value
+    boolean,          ///< boolean value
+    number_integer,   ///< number value (signed integer)
+    number_unsigned,  ///< number value (unsigned integer)
+    number_float,     ///< number value (floating-point)
+    binary,           ///< binary array (ordered collection of bytes)
+    discarded         ///< discarded by the parser callback function
+};
+
+/*!
+@brief comparison operator for JSON types
+
+Returns an ordering that is similar to Python:
+- order: null < boolean < number < object < array < string < binary
+- furthermore, each type is not smaller than itself
+- discarded values are not comparable
+- binary is represented as a b"" string in python and directly comparable to a
+  string; however, making a binary array directly comparable with a string would
+  be surprising behavior in a JSON file.
+
+@since version 1.0.0
+*/
+#if JSON_HAS_THREE_WAY_COMPARISON
+    inline std::partial_ordering operator<=>(const value_t lhs, const value_t rhs) noexcept // *NOPAD*
+#else
+    inline bool operator<(const value_t lhs, const value_t rhs) noexcept
+#endif
+{
+    static constexpr std::array<std::uint8_t, 9> order = {{
+            0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */,
+            1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */,
+            6 /* binary */
+        }
+    };
+
+    const auto l_index = static_cast<std::size_t>(lhs);
+    const auto r_index = static_cast<std::size_t>(rhs);
+#if JSON_HAS_THREE_WAY_COMPARISON
+    if (l_index < order.size() && r_index < order.size())
+    {
+        return order[l_index] <=> order[r_index]; // *NOPAD*
+    }
+    return std::partial_ordering::unordered;
+#else
+    return l_index < order.size() && r_index < order.size() && order[l_index] < order[r_index];
+#endif
+}
+
+// GCC selects the built-in operator< over an operator rewritten from
+// a user-defined spaceship operator
+// Clang, MSVC, and ICC select the rewritten candidate
+// (see GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105200)
+#if JSON_HAS_THREE_WAY_COMPARISON && defined(__GNUC__)
+inline bool operator<(const value_t lhs, const value_t rhs) noexcept
+{
+    return std::is_lt(lhs <=> rhs); // *NOPAD*
+}
+#endif
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/string_escape.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+// #include <qualla/detail/abi_macros.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+/*!
+@brief replace all occurrences of a substring by another string
+
+@param[in,out] s  the string to manipulate; changed so that all
+               occurrences of @a f are replaced with @a t
+@param[in]     f  the substring to replace with @a t
+@param[in]     t  the string to replace @a f
+
+@pre The search string @a f must not be empty. **This precondition is
+enforced with an assertion.**
+
+@since version 2.0.0
+*/
+template<typename StringType>
+inline void replace_substring(StringType& s, const StringType& f,
+                              const StringType& t)
+{
+    JSON_ASSERT(!f.empty());
+    for (auto pos = s.find(f);                // find first occurrence of f
+            pos != StringType::npos;          // make sure f was found
+            s.replace(pos, f.size(), t),      // replace with t, and
+            pos = s.find(f, pos + t.size()))  // find next occurrence of f
+    {}
+}
+
+/*!
+ * @brief string escaping as described in RFC 6901 (Sect. 4)
+ * @param[in] s string to escape
+ * @return    escaped string
+ *
+ * Note the order of escaping "~" to "~0" and "/" to "~1" is important.
+ */
+template<typename StringType>
+inline StringType escape(StringType s)
+{
+    replace_substring(s, StringType{"~"}, StringType{"~0"});
+    replace_substring(s, StringType{"/"}, StringType{"~1"});
+    return s;
+}
+
+/*!
+ * @brief string unescaping as described in RFC 6901 (Sect. 4)
+ * @param[in] s string to unescape
+ * @return    unescaped string
+ *
+ * Note the order of escaping "~1" to "/" and "~0" to "~" is important.
+ */
+template<typename StringType>
+static void unescape(StringType& s)
+{
+    replace_substring(s, StringType{"~1"}, StringType{"/"});
+    replace_substring(s, StringType{"~0"}, StringType{"~"});
+}
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/input/position_t.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <cstddef> // size_t
+
+// #include <qualla/detail/abi_macros.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+/// struct to capture the start position of the current token
+struct position_t
+{
+    /// the total number of characters read
+    std::size_t chars_read_total = 0;
+    /// the number of characters read in the current line
+    std::size_t chars_read_current_line = 0;
+    /// the number of lines read
+    std::size_t lines_read = 0;
+
+    /// conversion to size_t to preserve SAX interface
+    constexpr operator size_t() const
+    {
+        return chars_read_total;
+    }
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/meta/cpp_future.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-FileCopyrightText: 2018 The Abseil Authors
+// SPDX-License-Identifier: MIT
+
+
+
+#include <array> // array
+#include <cstddef> // size_t
+#include <type_traits> // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type
+#include <utility> // index_sequence, make_index_sequence, index_sequence_for
+
+// #include <qualla/detail/macro_scope.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+template<typename T>
+using uncvref_t = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
+
+#ifdef JSON_HAS_CPP_14
+
+// the following utilities are natively available in C++14
+using std::enable_if_t;
+using std::index_sequence;
+using std::make_index_sequence;
+using std::index_sequence_for;
+
+#else
+
+// alias templates to reduce boilerplate
+template<bool B, typename T = void>
+using enable_if_t = typename std::enable_if<B, T>::type;
+
+// The following code is taken from https://github.com/abseil/abseil-cpp/blob/10cb35e459f5ecca5b2ff107635da0bfa41011b4/absl/utility/utility.h
+// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0.
+
+//// START OF CODE FROM GOOGLE ABSEIL
+
+// integer_sequence
+//
+// Class template representing a compile-time integer sequence. An instantiation
+// of `integer_sequence<T, Ints...>` has a sequence of integers encoded in its
+// type through its template arguments (which is a common need when
+// working with C++11 variadic templates). `absl::integer_sequence` is designed
+// to be a drop-in replacement for C++14's `std::integer_sequence`.
+//
+// Example:
+//
+//   template< class T, T... Ints >
+//   void user_function(integer_sequence<T, Ints...>);
+//
+//   int main()
+//   {
+//     // user_function's `T` will be deduced to `int` and `Ints...`
+//     // will be deduced to `0, 1, 2, 3, 4`.
+//     user_function(make_integer_sequence<int, 5>());
+//   }
+template <typename T, T... Ints>
+struct integer_sequence
+{
+    using value_type = T;
+    static constexpr std::size_t size() noexcept
+    {
+        return sizeof...(Ints);
+    }
+};
+
+// index_sequence
+//
+// A helper template for an `integer_sequence` of `size_t`,
+// `absl::index_sequence` is designed to be a drop-in replacement for C++14's
+// `std::index_sequence`.
+template <size_t... Ints>
+using index_sequence = integer_sequence<size_t, Ints...>;
+
+namespace utility_internal
+{
+
+template <typename Seq, size_t SeqSize, size_t Rem>
+struct Extend;
+
+// Note that SeqSize == sizeof...(Ints). It's passed explicitly for efficiency.
+template <typename T, T... Ints, size_t SeqSize>
+struct Extend<integer_sequence<T, Ints...>, SeqSize, 0>
+{
+    using type = integer_sequence < T, Ints..., (Ints + SeqSize)... >;
+};
+
+template <typename T, T... Ints, size_t SeqSize>
+struct Extend<integer_sequence<T, Ints...>, SeqSize, 1>
+{
+    using type = integer_sequence < T, Ints..., (Ints + SeqSize)..., 2 * SeqSize >;
+};
+
+// Recursion helper for 'make_integer_sequence<T, N>'.
+// 'Gen<T, N>::type' is an alias for 'integer_sequence<T, 0, 1, ... N-1>'.
+template <typename T, size_t N>
+struct Gen
+{
+    using type =
+        typename Extend < typename Gen < T, N / 2 >::type, N / 2, N % 2 >::type;
+};
+
+template <typename T>
+struct Gen<T, 0>
+{
+    using type = integer_sequence<T>;
+};
+
+}  // namespace utility_internal
+
+// Compile-time sequences of integers
+
+// make_integer_sequence
+//
+// This template alias is equivalent to
+// `integer_sequence<int, 0, 1, ..., N-1>`, and is designed to be a drop-in
+// replacement for C++14's `std::make_integer_sequence`.
+template <typename T, T N>
+using make_integer_sequence = typename utility_internal::Gen<T, N>::type;
+
+// make_index_sequence
+//
+// This template alias is equivalent to `index_sequence<0, 1, ..., N-1>`,
+// and is designed to be a drop-in replacement for C++14's
+// `std::make_index_sequence`.
+template <size_t N>
+using make_index_sequence = make_integer_sequence<size_t, N>;
+
+// index_sequence_for
+//
+// Converts a typename pack into an index sequence of the same length, and
+// is designed to be a drop-in replacement for C++14's
+// `std::index_sequence_for()`
+template <typename... Ts>
+using index_sequence_for = make_index_sequence<sizeof...(Ts)>;
+
+//// END OF CODE FROM GOOGLE ABSEIL
+
+#endif
+
+// dispatch utility (taken from ranges-v3)
+template<unsigned N> struct priority_tag : priority_tag < N - 1 > {};
+template<> struct priority_tag<0> {};
+
+// taken from ranges-v3
+template<typename T>
+struct static_const
+{
+    static JSON_INLINE_VARIABLE constexpr T value{};
+};
+
+#ifndef JSON_HAS_CPP_17
+    template<typename T>
+    constexpr T static_const<T>::value;
+#endif
+
+template<typename T, typename... Args>
+inline constexpr std::array<T, sizeof...(Args)> make_array(Args&& ... args)
+{
+    return std::array<T, sizeof...(Args)> {{static_cast<T>(std::forward<Args>(args))...}};
+}
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/meta/type_traits.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <limits> // numeric_limits
+#include <type_traits> // false_type, is_constructible, is_integral, is_same, true_type
+#include <utility> // declval
+#include <tuple> // tuple
+
+// #include <qualla/detail/iterators/iterator_traits.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <iterator> // random_access_iterator_tag
+
+// #include <qualla/detail/abi_macros.hpp>
+
+// #include <qualla/detail/meta/void_t.hpp>
+
+// #include <qualla/detail/meta/cpp_future.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+template<typename It, typename = void>
+struct iterator_types {};
+
+template<typename It>
+struct iterator_types <
+    It,
+    void_t<typename It::difference_type, typename It::value_type, typename It::pointer,
+    typename It::reference, typename It::iterator_category >>
+{
+    using difference_type = typename It::difference_type;
+    using value_type = typename It::value_type;
+    using pointer = typename It::pointer;
+    using reference = typename It::reference;
+    using iterator_category = typename It::iterator_category;
+};
+
+// This is required as some compilers implement std::iterator_traits in a way that
+// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341.
+template<typename T, typename = void>
+struct iterator_traits
+{
+};
+
+template<typename T>
+struct iterator_traits < T, enable_if_t < !std::is_pointer<T>::value >>
+            : iterator_types<T>
+{
+};
+
+template<typename T>
+struct iterator_traits<T*, enable_if_t<std::is_object<T>::value>>
+{
+    using iterator_category = std::random_access_iterator_tag;
+    using value_type = T;
+    using difference_type = ptrdiff_t;
+    using pointer = T*;
+    using reference = T&;
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/meta/call_std/begin.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+// #include <qualla/detail/macro_scope.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+
+NLOHMANN_CAN_CALL_STD_FUNC_IMPL(begin);
+
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/meta/call_std/end.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+// #include <qualla/detail/macro_scope.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+
+NLOHMANN_CAN_CALL_STD_FUNC_IMPL(end);
+
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/meta/cpp_future.hpp>
+
+// #include <qualla/detail/meta/detected.hpp>
+
+// #include <qualla/json_fwd.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_
+    #define INCLUDE_NLOHMANN_JSON_FWD_HPP_
+
+    #include <cstdint> // int64_t, uint64_t
+    #include <map> // map
+    #include <memory> // allocator
+    #include <string> // string
+    #include <vector> // vector
+
+    // #include <qualla/detail/abi_macros.hpp>
+
+
+    /*!
+    @brief namespace for Niels Lohmann
+    @see https://github.com/qualla
+    @since version 1.0.0
+    */
+    NLOHMANN_JSON_NAMESPACE_BEGIN
+
+    /*!
+    @brief default JSONSerializer template argument
+
+    This serializer ignores the template arguments and uses ADL
+    ([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl))
+    for serialization.
+    */
+    template<typename T = void, typename SFINAE = void>
+    struct adl_serializer;
+
+    /// a class to store JSON values
+    /// @sa https://json.qualla.me/api/basic_json/
+    template<template<typename U, typename V, typename... Args> class ObjectType =
+    std::map,
+    template<typename U, typename... Args> class ArrayType = std::vector,
+    class StringType = std::string, class BooleanType = bool,
+    class NumberIntegerType = std::int64_t,
+    class NumberUnsignedType = std::uint64_t,
+    class NumberFloatType = double,
+    template<typename U> class AllocatorType = std::allocator,
+    template<typename T, typename SFINAE = void> class JSONSerializer =
+    adl_serializer,
+    class BinaryType = std::vector<std::uint8_t>>
+    class basic_json;
+
+    /// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document
+    /// @sa https://json.qualla.me/api/json_pointer/
+    template<typename RefStringType>
+    class json_pointer;
+
+    /*!
+    @brief default specialization
+    @sa https://json.qualla.me/api/json/
+    */
+    using json = basic_json<>;
+
+    /// @brief a minimal map-like container that preserves insertion order
+    /// @sa https://json.qualla.me/api/ordered_map/
+    template<class Key, class T, class IgnoredLess, class Allocator>
+    struct ordered_map;
+
+    /// @brief specialization that maintains the insertion order of object keys
+    /// @sa https://json.qualla.me/api/ordered_json/
+    using ordered_json = basic_json<qualla::ordered_map>;
+
+    NLOHMANN_JSON_NAMESPACE_END
+
+#endif  // INCLUDE_NLOHMANN_JSON_FWD_HPP_
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+/*!
+@brief detail namespace with internal helper functions
+
+This namespace collects functions that should not be exposed,
+implementations of some @ref basic_json methods, and meta-programming helpers.
+
+@since version 2.1.0
+*/
+namespace detail
+{
+
+/////////////
+// helpers //
+/////////////
+
+// Note to maintainers:
+//
+// Every trait in this file expects a non CV-qualified type.
+// The only exceptions are in the 'aliases for detected' section
+// (i.e. those of the form: decltype(T::member_function(std::declval<T>())))
+//
+// In this case, T has to be properly CV-qualified to constraint the function arguments
+// (e.g. to_json(BasicJsonType&, const T&))
+
+template<typename> struct is_basic_json : std::false_type {};
+
+NLOHMANN_BASIC_JSON_TPL_DECLARATION
+struct is_basic_json<NLOHMANN_BASIC_JSON_TPL> : std::true_type {};
+
+// used by exceptions create() member functions
+// true_type for pointer to possibly cv-qualified basic_json or std::nullptr_t
+// false_type otherwise
+template<typename BasicJsonContext>
+struct is_basic_json_context :
+    std::integral_constant < bool,
+    is_basic_json<typename std::remove_cv<typename std::remove_pointer<BasicJsonContext>::type>::type>::value
+    || std::is_same<BasicJsonContext, std::nullptr_t>::value >
+{};
+
+//////////////////////
+// json_ref helpers //
+//////////////////////
+
+template<typename>
+class json_ref;
+
+template<typename>
+struct is_json_ref : std::false_type {};
+
+template<typename T>
+struct is_json_ref<json_ref<T>> : std::true_type {};
+
+//////////////////////////
+// aliases for detected //
+//////////////////////////
+
+template<typename T>
+using mapped_type_t = typename T::mapped_type;
+
+template<typename T>
+using key_type_t = typename T::key_type;
+
+template<typename T>
+using value_type_t = typename T::value_type;
+
+template<typename T>
+using difference_type_t = typename T::difference_type;
+
+template<typename T>
+using pointer_t = typename T::pointer;
+
+template<typename T>
+using reference_t = typename T::reference;
+
+template<typename T>
+using iterator_category_t = typename T::iterator_category;
+
+template<typename T, typename... Args>
+using to_json_function = decltype(T::to_json(std::declval<Args>()...));
+
+template<typename T, typename... Args>
+using from_json_function = decltype(T::from_json(std::declval<Args>()...));
+
+template<typename T, typename U>
+using get_template_function = decltype(std::declval<T>().template get<U>());
+
+// trait checking if JSONSerializer<T>::from_json(json const&, udt&) exists
+template<typename BasicJsonType, typename T, typename = void>
+struct has_from_json : std::false_type {};
+
+// trait checking if j.get<T> is valid
+// use this trait instead of std::is_constructible or std::is_convertible,
+// both rely on, or make use of implicit conversions, and thus fail when T
+// has several constructors/operator= (see https://github.com/nlohmann/json/issues/958)
+template <typename BasicJsonType, typename T>
+struct is_getable
+{
+    static constexpr bool value = is_detected<get_template_function, const BasicJsonType&, T>::value;
+};
+
+template<typename BasicJsonType, typename T>
+struct has_from_json < BasicJsonType, T, enable_if_t < !is_basic_json<T>::value >>
+{
+    using serializer = typename BasicJsonType::template json_serializer<T, void>;
+
+    static constexpr bool value =
+        is_detected_exact<void, from_json_function, serializer,
+        const BasicJsonType&, T&>::value;
+};
+
+// This trait checks if JSONSerializer<T>::from_json(json const&) exists
+// this overload is used for non-default-constructible user-defined-types
+template<typename BasicJsonType, typename T, typename = void>
+struct has_non_default_from_json : std::false_type {};
+
+template<typename BasicJsonType, typename T>
+struct has_non_default_from_json < BasicJsonType, T, enable_if_t < !is_basic_json<T>::value >>
+{
+    using serializer = typename BasicJsonType::template json_serializer<T, void>;
+
+    static constexpr bool value =
+        is_detected_exact<T, from_json_function, serializer,
+        const BasicJsonType&>::value;
+};
+
+// This trait checks if BasicJsonType::json_serializer<T>::to_json exists
+// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion.
+template<typename BasicJsonType, typename T, typename = void>
+struct has_to_json : std::false_type {};
+
+template<typename BasicJsonType, typename T>
+struct has_to_json < BasicJsonType, T, enable_if_t < !is_basic_json<T>::value >>
+{
+    using serializer = typename BasicJsonType::template json_serializer<T, void>;
+
+    static constexpr bool value =
+        is_detected_exact<void, to_json_function, serializer, BasicJsonType&,
+        T>::value;
+};
+
+template<typename T>
+using detect_key_compare = typename T::key_compare;
+
+template<typename T>
+struct has_key_compare : std::integral_constant<bool, is_detected<detect_key_compare, T>::value> {};
+
+// obtains the actual object key comparator
+template<typename BasicJsonType>
+struct actual_object_comparator
+{
+    using object_t = typename BasicJsonType::object_t;
+    using object_comparator_t = typename BasicJsonType::default_object_comparator_t;
+    using type = typename std::conditional < has_key_compare<object_t>::value,
+          typename object_t::key_compare, object_comparator_t>::type;
+};
+
+template<typename BasicJsonType>
+using actual_object_comparator_t = typename actual_object_comparator<BasicJsonType>::type;
+
+///////////////////
+// is_ functions //
+///////////////////
+
+// https://en.cppreference.com/w/cpp/types/conjunction
+template<class...> struct conjunction : std::true_type { };
+template<class B> struct conjunction<B> : B { };
+template<class B, class... Bn>
+struct conjunction<B, Bn...>
+: std::conditional<static_cast<bool>(B::value), conjunction<Bn...>, B>::type {};
+
+// https://en.cppreference.com/w/cpp/types/negation
+template<class B> struct negation : std::integral_constant < bool, !B::value > { };
+
+// Reimplementation of is_constructible and is_default_constructible, due to them being broken for
+// std::pair and std::tuple until LWG 2367 fix (see https://cplusplus.github.io/LWG/lwg-defects.html#2367).
+// This causes compile errors in e.g. clang 3.5 or gcc 4.9.
+template <typename T>
+struct is_default_constructible : std::is_default_constructible<T> {};
+
+template <typename T1, typename T2>
+struct is_default_constructible<std::pair<T1, T2>>
+            : conjunction<is_default_constructible<T1>, is_default_constructible<T2>> {};
+
+template <typename T1, typename T2>
+struct is_default_constructible<const std::pair<T1, T2>>
+            : conjunction<is_default_constructible<T1>, is_default_constructible<T2>> {};
+
+template <typename... Ts>
+struct is_default_constructible<std::tuple<Ts...>>
+            : conjunction<is_default_constructible<Ts>...> {};
+
+template <typename... Ts>
+struct is_default_constructible<const std::tuple<Ts...>>
+            : conjunction<is_default_constructible<Ts>...> {};
+
+
+template <typename T, typename... Args>
+struct is_constructible : std::is_constructible<T, Args...> {};
+
+template <typename T1, typename T2>
+struct is_constructible<std::pair<T1, T2>> : is_default_constructible<std::pair<T1, T2>> {};
+
+template <typename T1, typename T2>
+struct is_constructible<const std::pair<T1, T2>> : is_default_constructible<const std::pair<T1, T2>> {};
+
+template <typename... Ts>
+struct is_constructible<std::tuple<Ts...>> : is_default_constructible<std::tuple<Ts...>> {};
+
+template <typename... Ts>
+struct is_constructible<const std::tuple<Ts...>> : is_default_constructible<const std::tuple<Ts...>> {};
+
+
+template<typename T, typename = void>
+struct is_iterator_traits : std::false_type {};
+
+template<typename T>
+struct is_iterator_traits<iterator_traits<T>>
+{
+  private:
+    using traits = iterator_traits<T>;
+
+  public:
+    static constexpr auto value =
+        is_detected<value_type_t, traits>::value &&
+        is_detected<difference_type_t, traits>::value &&
+        is_detected<pointer_t, traits>::value &&
+        is_detected<iterator_category_t, traits>::value &&
+        is_detected<reference_t, traits>::value;
+};
+
+template<typename T>
+struct is_range
+{
+  private:
+    using t_ref = typename std::add_lvalue_reference<T>::type;
+
+    using iterator = detected_t<result_of_begin, t_ref>;
+    using sentinel = detected_t<result_of_end, t_ref>;
+
+    // to be 100% correct, it should use https://en.cppreference.com/w/cpp/iterator/input_or_output_iterator
+    // and https://en.cppreference.com/w/cpp/iterator/sentinel_for
+    // but reimplementing these would be too much work, as a lot of other concepts are used underneath
+    static constexpr auto is_iterator_begin =
+        is_iterator_traits<iterator_traits<iterator>>::value;
+
+  public:
+    static constexpr bool value = !std::is_same<iterator, nonesuch>::value && !std::is_same<sentinel, nonesuch>::value && is_iterator_begin;
+};
+
+template<typename R>
+using iterator_t = enable_if_t<is_range<R>::value, result_of_begin<decltype(std::declval<R&>())>>;
+
+template<typename T>
+using range_value_t = value_type_t<iterator_traits<iterator_t<T>>>;
+
+// The following implementation of is_complete_type is taken from
+// https://blogs.msdn.microsoft.com/vcblog/2015/12/02/partial-support-for-expression-sfinae-in-vs-2015-update-1/
+// and is written by Xiang Fan who agreed to using it in this library.
+
+template<typename T, typename = void>
+struct is_complete_type : std::false_type {};
+
+template<typename T>
+struct is_complete_type<T, decltype(void(sizeof(T)))> : std::true_type {};
+
+template<typename BasicJsonType, typename CompatibleObjectType,
+         typename = void>
+struct is_compatible_object_type_impl : std::false_type {};
+
+template<typename BasicJsonType, typename CompatibleObjectType>
+struct is_compatible_object_type_impl <
+    BasicJsonType, CompatibleObjectType,
+    enable_if_t < is_detected<mapped_type_t, CompatibleObjectType>::value&&
+    is_detected<key_type_t, CompatibleObjectType>::value >>
+{
+    using object_t = typename BasicJsonType::object_t;
+
+    // macOS's is_constructible does not play well with nonesuch...
+    static constexpr bool value =
+        is_constructible<typename object_t::key_type,
+        typename CompatibleObjectType::key_type>::value &&
+        is_constructible<typename object_t::mapped_type,
+        typename CompatibleObjectType::mapped_type>::value;
+};
+
+template<typename BasicJsonType, typename CompatibleObjectType>
+struct is_compatible_object_type
+    : is_compatible_object_type_impl<BasicJsonType, CompatibleObjectType> {};
+
+template<typename BasicJsonType, typename ConstructibleObjectType,
+         typename = void>
+struct is_constructible_object_type_impl : std::false_type {};
+
+template<typename BasicJsonType, typename ConstructibleObjectType>
+struct is_constructible_object_type_impl <
+    BasicJsonType, ConstructibleObjectType,
+    enable_if_t < is_detected<mapped_type_t, ConstructibleObjectType>::value&&
+    is_detected<key_type_t, ConstructibleObjectType>::value >>
+{
+    using object_t = typename BasicJsonType::object_t;
+
+    static constexpr bool value =
+        (is_default_constructible<ConstructibleObjectType>::value &&
+         (std::is_move_assignable<ConstructibleObjectType>::value ||
+          std::is_copy_assignable<ConstructibleObjectType>::value) &&
+         (is_constructible<typename ConstructibleObjectType::key_type,
+          typename object_t::key_type>::value &&
+          std::is_same <
+          typename object_t::mapped_type,
+          typename ConstructibleObjectType::mapped_type >::value)) ||
+        (has_from_json<BasicJsonType,
+         typename ConstructibleObjectType::mapped_type>::value ||
+         has_non_default_from_json <
+         BasicJsonType,
+         typename ConstructibleObjectType::mapped_type >::value);
+};
+
+template<typename BasicJsonType, typename ConstructibleObjectType>
+struct is_constructible_object_type
+    : is_constructible_object_type_impl<BasicJsonType,
+      ConstructibleObjectType> {};
+
+template<typename BasicJsonType, typename CompatibleStringType>
+struct is_compatible_string_type
+{
+    static constexpr auto value =
+        is_constructible<typename BasicJsonType::string_t, CompatibleStringType>::value;
+};
+
+template<typename BasicJsonType, typename ConstructibleStringType>
+struct is_constructible_string_type
+{
+    // launder type through decltype() to fix compilation failure on ICPC
+#ifdef __INTEL_COMPILER
+    using laundered_type = decltype(std::declval<ConstructibleStringType>());
+#else
+    using laundered_type = ConstructibleStringType;
+#endif
+
+    static constexpr auto value =
+        conjunction <
+        is_constructible<laundered_type, typename BasicJsonType::string_t>,
+        is_detected_exact<typename BasicJsonType::string_t::value_type,
+        value_type_t, laundered_type >>::value;
+};
+
+template<typename BasicJsonType, typename CompatibleArrayType, typename = void>
+struct is_compatible_array_type_impl : std::false_type {};
+
+template<typename BasicJsonType, typename CompatibleArrayType>
+struct is_compatible_array_type_impl <
+    BasicJsonType, CompatibleArrayType,
+    enable_if_t <
+    is_detected<iterator_t, CompatibleArrayType>::value&&
+    is_iterator_traits<iterator_traits<detected_t<iterator_t, CompatibleArrayType>>>::value&&
+// special case for types like std::filesystem::path whose iterator's value_type are themselves
+// c.f. https://github.com/nlohmann/json/pull/3073
+    !std::is_same<CompatibleArrayType, detected_t<range_value_t, CompatibleArrayType>>::value >>
+{
+    static constexpr bool value =
+        is_constructible<BasicJsonType,
+        range_value_t<CompatibleArrayType>>::value;
+};
+
+template<typename BasicJsonType, typename CompatibleArrayType>
+struct is_compatible_array_type
+    : is_compatible_array_type_impl<BasicJsonType, CompatibleArrayType> {};
+
+template<typename BasicJsonType, typename ConstructibleArrayType, typename = void>
+struct is_constructible_array_type_impl : std::false_type {};
+
+template<typename BasicJsonType, typename ConstructibleArrayType>
+struct is_constructible_array_type_impl <
+    BasicJsonType, ConstructibleArrayType,
+    enable_if_t<std::is_same<ConstructibleArrayType,
+    typename BasicJsonType::value_type>::value >>
+            : std::true_type {};
+
+template<typename BasicJsonType, typename ConstructibleArrayType>
+struct is_constructible_array_type_impl <
+    BasicJsonType, ConstructibleArrayType,
+    enable_if_t < !std::is_same<ConstructibleArrayType,
+    typename BasicJsonType::value_type>::value&&
+    !is_compatible_string_type<BasicJsonType, ConstructibleArrayType>::value&&
+    is_default_constructible<ConstructibleArrayType>::value&&
+(std::is_move_assignable<ConstructibleArrayType>::value ||
+ std::is_copy_assignable<ConstructibleArrayType>::value)&&
+is_detected<iterator_t, ConstructibleArrayType>::value&&
+is_iterator_traits<iterator_traits<detected_t<iterator_t, ConstructibleArrayType>>>::value&&
+is_detected<range_value_t, ConstructibleArrayType>::value&&
+// special case for types like std::filesystem::path whose iterator's value_type are themselves
+// c.f. https://github.com/nlohmann/json/pull/3073
+!std::is_same<ConstructibleArrayType, detected_t<range_value_t, ConstructibleArrayType>>::value&&
+        is_complete_type <
+        detected_t<range_value_t, ConstructibleArrayType >>::value >>
+{
+    using value_type = range_value_t<ConstructibleArrayType>;
+
+    static constexpr bool value =
+        std::is_same<value_type,
+        typename BasicJsonType::array_t::value_type>::value ||
+        has_from_json<BasicJsonType,
+        value_type>::value ||
+        has_non_default_from_json <
+        BasicJsonType,
+        value_type >::value;
+};
+
+template<typename BasicJsonType, typename ConstructibleArrayType>
+struct is_constructible_array_type
+    : is_constructible_array_type_impl<BasicJsonType, ConstructibleArrayType> {};
+
+template<typename RealIntegerType, typename CompatibleNumberIntegerType,
+         typename = void>
+struct is_compatible_integer_type_impl : std::false_type {};
+
+template<typename RealIntegerType, typename CompatibleNumberIntegerType>
+struct is_compatible_integer_type_impl <
+    RealIntegerType, CompatibleNumberIntegerType,
+    enable_if_t < std::is_integral<RealIntegerType>::value&&
+    std::is_integral<CompatibleNumberIntegerType>::value&&
+    !std::is_same<bool, CompatibleNumberIntegerType>::value >>
+{
+    // is there an assert somewhere on overflows?
+    using RealLimits = std::numeric_limits<RealIntegerType>;
+    using CompatibleLimits = std::numeric_limits<CompatibleNumberIntegerType>;
+
+    static constexpr auto value =
+        is_constructible<RealIntegerType,
+        CompatibleNumberIntegerType>::value &&
+        CompatibleLimits::is_integer &&
+        RealLimits::is_signed == CompatibleLimits::is_signed;
+};
+
+template<typename RealIntegerType, typename CompatibleNumberIntegerType>
+struct is_compatible_integer_type
+    : is_compatible_integer_type_impl<RealIntegerType,
+      CompatibleNumberIntegerType> {};
+
+template<typename BasicJsonType, typename CompatibleType, typename = void>
+struct is_compatible_type_impl: std::false_type {};
+
+template<typename BasicJsonType, typename CompatibleType>
+struct is_compatible_type_impl <
+    BasicJsonType, CompatibleType,
+    enable_if_t<is_complete_type<CompatibleType>::value >>
+{
+    static constexpr bool value =
+        has_to_json<BasicJsonType, CompatibleType>::value;
+};
+
+template<typename BasicJsonType, typename CompatibleType>
+struct is_compatible_type
+    : is_compatible_type_impl<BasicJsonType, CompatibleType> {};
+
+template<typename T1, typename T2>
+struct is_constructible_tuple : std::false_type {};
+
+template<typename T1, typename... Args>
+struct is_constructible_tuple<T1, std::tuple<Args...>> : conjunction<is_constructible<T1, Args>...> {};
+
+template<typename BasicJsonType, typename T>
+struct is_json_iterator_of : std::false_type {};
+
+template<typename BasicJsonType>
+struct is_json_iterator_of<BasicJsonType, typename BasicJsonType::iterator> : std::true_type {};
+
+template<typename BasicJsonType>
+struct is_json_iterator_of<BasicJsonType, typename BasicJsonType::const_iterator> : std::true_type
+{};
+
+// checks if a given type T is a template specialization of Primary
+template<template <typename...> class Primary, typename T>
+struct is_specialization_of : std::false_type {};
+
+template<template <typename...> class Primary, typename... Args>
+struct is_specialization_of<Primary, Primary<Args...>> : std::true_type {};
+
+template<typename T>
+using is_json_pointer = is_specialization_of<::qualla::json_pointer, uncvref_t<T>>;
+
+// checks if A and B are comparable using Compare functor
+template<typename Compare, typename A, typename B, typename = void>
+struct is_comparable : std::false_type {};
+
+template<typename Compare, typename A, typename B>
+struct is_comparable<Compare, A, B, void_t<
+decltype(std::declval<Compare>()(std::declval<A>(), std::declval<B>())),
+decltype(std::declval<Compare>()(std::declval<B>(), std::declval<A>()))
+>> : std::true_type {};
+
+template<typename T>
+using detect_is_transparent = typename T::is_transparent;
+
+// type trait to check if KeyType can be used as object key (without a BasicJsonType)
+// see is_usable_as_basic_json_key_type below
+template<typename Comparator, typename ObjectKeyType, typename KeyTypeCVRef, bool RequireTransparentComparator = true,
+         bool ExcludeObjectKeyType = RequireTransparentComparator, typename KeyType = uncvref_t<KeyTypeCVRef>>
+using is_usable_as_key_type = typename std::conditional <
+                              is_comparable<Comparator, ObjectKeyType, KeyTypeCVRef>::value
+                              && !(ExcludeObjectKeyType && std::is_same<KeyType,
+                                   ObjectKeyType>::value)
+                              && (!RequireTransparentComparator
+                                  || is_detected <detect_is_transparent, Comparator>::value)
+                              && !is_json_pointer<KeyType>::value,
+                              std::true_type,
+                              std::false_type >::type;
+
+// type trait to check if KeyType can be used as object key
+// true if:
+//   - KeyType is comparable with BasicJsonType::object_t::key_type
+//   - if ExcludeObjectKeyType is true, KeyType is not BasicJsonType::object_t::key_type
+//   - the comparator is transparent or RequireTransparentComparator is false
+//   - KeyType is not a JSON iterator or json_pointer
+template<typename BasicJsonType, typename KeyTypeCVRef, bool RequireTransparentComparator = true,
+         bool ExcludeObjectKeyType = RequireTransparentComparator, typename KeyType = uncvref_t<KeyTypeCVRef>>
+using is_usable_as_basic_json_key_type = typename std::conditional <
+        is_usable_as_key_type<typename BasicJsonType::object_comparator_t,
+        typename BasicJsonType::object_t::key_type, KeyTypeCVRef,
+        RequireTransparentComparator, ExcludeObjectKeyType>::value
+        && !is_json_iterator_of<BasicJsonType, KeyType>::value,
+        std::true_type,
+        std::false_type >::type;
+
+template<typename ObjectType, typename KeyType>
+using detect_erase_with_key_type = decltype(std::declval<ObjectType&>().erase(std::declval<KeyType>()));
+
+// type trait to check if object_t has an erase() member functions accepting KeyType
+template<typename BasicJsonType, typename KeyType>
+using has_erase_with_key_type = typename std::conditional <
+                                is_detected <
+                                detect_erase_with_key_type,
+                                typename BasicJsonType::object_t, KeyType >::value,
+                                std::true_type,
+                                std::false_type >::type;
+
+// a naive helper to check if a type is an ordered_map (exploits the fact that
+// ordered_map inherits capacity() from std::vector)
+template <typename T>
+struct is_ordered_map
+{
+    using one = char;
+
+    struct two
+    {
+        char x[2]; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
+    };
+
+    template <typename C> static one test( decltype(&C::capacity) ) ;
+    template <typename C> static two test(...);
+
+    enum { value = sizeof(test<T>(nullptr)) == sizeof(char) }; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
+};
+
+// to avoid useless casts (see https://github.com/nlohmann/json/issues/2893#issuecomment-889152324)
+template < typename T, typename U, enable_if_t < !std::is_same<T, U>::value, int > = 0 >
+T conditional_static_cast(U value)
+{
+    return static_cast<T>(value);
+}
+
+template<typename T, typename U, enable_if_t<std::is_same<T, U>::value, int> = 0>
+T conditional_static_cast(U value)
+{
+    return value;
+}
+
+template<typename... Types>
+using all_integral = conjunction<std::is_integral<Types>...>;
+
+template<typename... Types>
+using all_signed = conjunction<std::is_signed<Types>...>;
+
+template<typename... Types>
+using all_unsigned = conjunction<std::is_unsigned<Types>...>;
+
+// there's a disjunction trait in another PR; replace when merged
+template<typename... Types>
+using same_sign = std::integral_constant < bool,
+      all_signed<Types...>::value || all_unsigned<Types...>::value >;
+
+template<typename OfType, typename T>
+using never_out_of_range = std::integral_constant < bool,
+      (std::is_signed<OfType>::value && (sizeof(T) < sizeof(OfType)))
+      || (same_sign<OfType, T>::value && sizeof(OfType) == sizeof(T)) >;
+
+template<typename OfType, typename T,
+         bool OfTypeSigned = std::is_signed<OfType>::value,
+         bool TSigned = std::is_signed<T>::value>
+struct value_in_range_of_impl2;
+
+template<typename OfType, typename T>
+struct value_in_range_of_impl2<OfType, T, false, false>
+{
+    static constexpr bool test(T val)
+    {
+        using CommonType = typename std::common_type<OfType, T>::type;
+        return static_cast<CommonType>(val) <= static_cast<CommonType>((std::numeric_limits<OfType>::max)());
+    }
+};
+
+template<typename OfType, typename T>
+struct value_in_range_of_impl2<OfType, T, true, false>
+{
+    static constexpr bool test(T val)
+    {
+        using CommonType = typename std::common_type<OfType, T>::type;
+        return static_cast<CommonType>(val) <= static_cast<CommonType>((std::numeric_limits<OfType>::max)());
+    }
+};
+
+template<typename OfType, typename T>
+struct value_in_range_of_impl2<OfType, T, false, true>
+{
+    static constexpr bool test(T val)
+    {
+        using CommonType = typename std::common_type<OfType, T>::type;
+        return val >= 0 && static_cast<CommonType>(val) <= static_cast<CommonType>((std::numeric_limits<OfType>::max)());
+    }
+};
+
+
+template<typename OfType, typename T>
+struct value_in_range_of_impl2<OfType, T, true, true>
+{
+    static constexpr bool test(T val)
+    {
+        using CommonType = typename std::common_type<OfType, T>::type;
+        return static_cast<CommonType>(val) >= static_cast<CommonType>((std::numeric_limits<OfType>::min)())
+               && static_cast<CommonType>(val) <= static_cast<CommonType>((std::numeric_limits<OfType>::max)());
+    }
+};
+
+template<typename OfType, typename T,
+         bool NeverOutOfRange = never_out_of_range<OfType, T>::value,
+         typename = detail::enable_if_t<all_integral<OfType, T>::value>>
+struct value_in_range_of_impl1;
+
+template<typename OfType, typename T>
+struct value_in_range_of_impl1<OfType, T, false>
+{
+    static constexpr bool test(T val)
+    {
+        return value_in_range_of_impl2<OfType, T>::test(val);
+    }
+};
+
+template<typename OfType, typename T>
+struct value_in_range_of_impl1<OfType, T, true>
+{
+    static constexpr bool test(T /*val*/)
+    {
+        return true;
+    }
+};
+
+template<typename OfType, typename T>
+inline constexpr bool value_in_range_of(T val)
+{
+    return value_in_range_of_impl1<OfType, T>::test(val);
+}
+
+template<bool Value>
+using bool_constant = std::integral_constant<bool, Value>;
+
+///////////////////////////////////////////////////////////////////////////////
+// is_c_string
+///////////////////////////////////////////////////////////////////////////////
+
+namespace impl
+{
+
+template<typename T>
+inline constexpr bool is_c_string()
+{
+    using TUnExt = typename std::remove_extent<T>::type;
+    using TUnCVExt = typename std::remove_cv<TUnExt>::type;
+    using TUnPtr = typename std::remove_pointer<T>::type;
+    using TUnCVPtr = typename std::remove_cv<TUnPtr>::type;
+    return
+        (std::is_array<T>::value && std::is_same<TUnCVExt, char>::value)
+        || (std::is_pointer<T>::value && std::is_same<TUnCVPtr, char>::value);
+}
+
+}  // namespace impl
+
+// checks whether T is a [cv] char */[cv] char[] C string
+template<typename T>
+struct is_c_string : bool_constant<impl::is_c_string<T>()> {};
+
+template<typename T>
+using is_c_string_uncvref = is_c_string<uncvref_t<T>>;
+
+///////////////////////////////////////////////////////////////////////////////
+// is_transparent
+///////////////////////////////////////////////////////////////////////////////
+
+namespace impl
+{
+
+template<typename T>
+inline constexpr bool is_transparent()
+{
+    return is_detected<detect_is_transparent, T>::value;
+}
+
+}  // namespace impl
+
+// checks whether T has a member named is_transparent
+template<typename T>
+struct is_transparent : bool_constant<impl::is_transparent<T>()> {};
+
+///////////////////////////////////////////////////////////////////////////////
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/string_concat.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <cstring> // strlen
+#include <string> // string
+#include <utility> // forward
+
+// #include <qualla/detail/meta/cpp_future.hpp>
+
+// #include <qualla/detail/meta/detected.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+inline std::size_t concat_length()
+{
+    return 0;
+}
+
+template<typename... Args>
+inline std::size_t concat_length(const char* cstr, Args&& ... rest);
+
+template<typename StringType, typename... Args>
+inline std::size_t concat_length(const StringType& str, Args&& ... rest);
+
+template<typename... Args>
+inline std::size_t concat_length(const char /*c*/, Args&& ... rest)
+{
+    return 1 + concat_length(std::forward<Args>(rest)...);
+}
+
+template<typename... Args>
+inline std::size_t concat_length(const char* cstr, Args&& ... rest)
+{
+    // cppcheck-suppress ignoredReturnValue
+    return ::strlen(cstr) + concat_length(std::forward<Args>(rest)...);
+}
+
+template<typename StringType, typename... Args>
+inline std::size_t concat_length(const StringType& str, Args&& ... rest)
+{
+    return str.size() + concat_length(std::forward<Args>(rest)...);
+}
+
+template<typename OutStringType>
+inline void concat_into(OutStringType& /*out*/)
+{}
+
+template<typename StringType, typename Arg>
+using string_can_append = decltype(std::declval<StringType&>().append(std::declval < Arg && > ()));
+
+template<typename StringType, typename Arg>
+using detect_string_can_append = is_detected<string_can_append, StringType, Arg>;
+
+template<typename StringType, typename Arg>
+using string_can_append_op = decltype(std::declval<StringType&>() += std::declval < Arg && > ());
+
+template<typename StringType, typename Arg>
+using detect_string_can_append_op = is_detected<string_can_append_op, StringType, Arg>;
+
+template<typename StringType, typename Arg>
+using string_can_append_iter = decltype(std::declval<StringType&>().append(std::declval<const Arg&>().begin(), std::declval<const Arg&>().end()));
+
+template<typename StringType, typename Arg>
+using detect_string_can_append_iter = is_detected<string_can_append_iter, StringType, Arg>;
+
+template<typename StringType, typename Arg>
+using string_can_append_data = decltype(std::declval<StringType&>().append(std::declval<const Arg&>().data(), std::declval<const Arg&>().size()));
+
+template<typename StringType, typename Arg>
+using detect_string_can_append_data = is_detected<string_can_append_data, StringType, Arg>;
+
+template < typename OutStringType, typename Arg, typename... Args,
+           enable_if_t < !detect_string_can_append<OutStringType, Arg>::value
+                         && detect_string_can_append_op<OutStringType, Arg>::value, int > = 0 >
+inline void concat_into(OutStringType& out, Arg && arg, Args && ... rest);
+
+template < typename OutStringType, typename Arg, typename... Args,
+           enable_if_t < !detect_string_can_append<OutStringType, Arg>::value
+                         && !detect_string_can_append_op<OutStringType, Arg>::value
+                         && detect_string_can_append_iter<OutStringType, Arg>::value, int > = 0 >
+inline void concat_into(OutStringType& out, const Arg& arg, Args && ... rest);
+
+template < typename OutStringType, typename Arg, typename... Args,
+           enable_if_t < !detect_string_can_append<OutStringType, Arg>::value
+                         && !detect_string_can_append_op<OutStringType, Arg>::value
+                         && !detect_string_can_append_iter<OutStringType, Arg>::value
+                         && detect_string_can_append_data<OutStringType, Arg>::value, int > = 0 >
+inline void concat_into(OutStringType& out, const Arg& arg, Args && ... rest);
+
+template<typename OutStringType, typename Arg, typename... Args,
+         enable_if_t<detect_string_can_append<OutStringType, Arg>::value, int> = 0>
+inline void concat_into(OutStringType& out, Arg && arg, Args && ... rest)
+{
+    out.append(std::forward<Arg>(arg));
+    concat_into(out, std::forward<Args>(rest)...);
+}
+
+template < typename OutStringType, typename Arg, typename... Args,
+           enable_if_t < !detect_string_can_append<OutStringType, Arg>::value
+                         && detect_string_can_append_op<OutStringType, Arg>::value, int > >
+inline void concat_into(OutStringType& out, Arg&& arg, Args&& ... rest)
+{
+    out += std::forward<Arg>(arg);
+    concat_into(out, std::forward<Args>(rest)...);
+}
+
+template < typename OutStringType, typename Arg, typename... Args,
+           enable_if_t < !detect_string_can_append<OutStringType, Arg>::value
+                         && !detect_string_can_append_op<OutStringType, Arg>::value
+                         && detect_string_can_append_iter<OutStringType, Arg>::value, int > >
+inline void concat_into(OutStringType& out, const Arg& arg, Args&& ... rest)
+{
+    out.append(arg.begin(), arg.end());
+    concat_into(out, std::forward<Args>(rest)...);
+}
+
+template < typename OutStringType, typename Arg, typename... Args,
+           enable_if_t < !detect_string_can_append<OutStringType, Arg>::value
+                         && !detect_string_can_append_op<OutStringType, Arg>::value
+                         && !detect_string_can_append_iter<OutStringType, Arg>::value
+                         && detect_string_can_append_data<OutStringType, Arg>::value, int > >
+inline void concat_into(OutStringType& out, const Arg& arg, Args&& ... rest)
+{
+    out.append(arg.data(), arg.size());
+    concat_into(out, std::forward<Args>(rest)...);
+}
+
+template<typename OutStringType = std::string, typename... Args>
+inline OutStringType concat(Args && ... args)
+{
+    OutStringType str;
+    str.reserve(concat_length(std::forward<Args>(args)...));
+    concat_into(str, std::forward<Args>(args)...);
+    return str;
+}
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+////////////////
+// exceptions //
+////////////////
+
+/// @brief general exception of the @ref basic_json class
+/// @sa https://json.qualla.me/api/basic_json/exception/
+class exception : public std::exception
+{
+  public:
+    /// returns the explanatory string
+    const char* what() const noexcept override
+    {
+        return m.what();
+    }
+
+    /// the id of the exception
+    const int id; // NOLINT(cppcoreguidelines-non-private-member-variables-in-classes)
+
+  protected:
+    JSON_HEDLEY_NON_NULL(3)
+    exception(int id_, const char* what_arg) : id(id_), m(what_arg) {} // NOLINT(bugprone-throw-keyword-missing)
+
+    static std::string name(const std::string& ename, int id_)
+    {
+        return concat("[json.exception.", ename, '.', std::to_string(id_), "] ");
+    }
+
+    static std::string diagnostics(std::nullptr_t /*leaf_element*/)
+    {
+        return "";
+    }
+
+    template<typename BasicJsonType>
+    static std::string diagnostics(const BasicJsonType* leaf_element)
+    {
+#if JSON_DIAGNOSTICS
+        std::vector<std::string> tokens;
+        for (const auto* current = leaf_element; current != nullptr && current->m_parent != nullptr; current = current->m_parent)
+        {
+            switch (current->m_parent->type())
+            {
+                case value_t::array:
+                {
+                    for (std::size_t i = 0; i < current->m_parent->m_value.array->size(); ++i)
+                    {
+                        if (&current->m_parent->m_value.array->operator[](i) == current)
+                        {
+                            tokens.emplace_back(std::to_string(i));
+                            break;
+                        }
+                    }
+                    break;
+                }
+
+                case value_t::object:
+                {
+                    for (const auto& element : *current->m_parent->m_value.object)
+                    {
+                        if (&element.second == current)
+                        {
+                            tokens.emplace_back(element.first.c_str());
+                            break;
+                        }
+                    }
+                    break;
+                }
+
+                case value_t::null: // LCOV_EXCL_LINE
+                case value_t::string: // LCOV_EXCL_LINE
+                case value_t::boolean: // LCOV_EXCL_LINE
+                case value_t::number_integer: // LCOV_EXCL_LINE
+                case value_t::number_unsigned: // LCOV_EXCL_LINE
+                case value_t::number_float: // LCOV_EXCL_LINE
+                case value_t::binary: // LCOV_EXCL_LINE
+                case value_t::discarded: // LCOV_EXCL_LINE
+                default:   // LCOV_EXCL_LINE
+                    break; // LCOV_EXCL_LINE
+            }
+        }
+
+        if (tokens.empty())
+        {
+            return "";
+        }
+
+        auto str = std::accumulate(tokens.rbegin(), tokens.rend(), std::string{},
+                                   [](const std::string & a, const std::string & b)
+        {
+            return concat(a, '/', detail::escape(b));
+        });
+        return concat('(', str, ") ");
+#else
+        static_cast<void>(leaf_element);
+        return "";
+#endif
+    }
+
+  private:
+    /// an exception object as storage for error messages
+    std::runtime_error m;
+};
+
+/// @brief exception indicating a parse error
+/// @sa https://json.qualla.me/api/basic_json/parse_error/
+class parse_error : public exception
+{
+  public:
+    /*!
+    @brief create a parse error exception
+    @param[in] id_       the id of the exception
+    @param[in] pos       the position where the error occurred (or with
+                         chars_read_total=0 if the position cannot be
+                         determined)
+    @param[in] what_arg  the explanatory string
+    @return parse_error object
+    */
+    template<typename BasicJsonContext, enable_if_t<is_basic_json_context<BasicJsonContext>::value, int> = 0>
+    static parse_error create(int id_, const position_t& pos, const std::string& what_arg, BasicJsonContext context)
+    {
+        std::string w = concat(exception::name("parse_error", id_), "parse error",
+                               position_string(pos), ": ", exception::diagnostics(context), what_arg);
+        return {id_, pos.chars_read_total, w.c_str()};
+    }
+
+    template<typename BasicJsonContext, enable_if_t<is_basic_json_context<BasicJsonContext>::value, int> = 0>
+    static parse_error create(int id_, std::size_t byte_, const std::string& what_arg, BasicJsonContext context)
+    {
+        std::string w = concat(exception::name("parse_error", id_), "parse error",
+                               (byte_ != 0 ? (concat(" at byte ", std::to_string(byte_))) : ""),
+                               ": ", exception::diagnostics(context), what_arg);
+        return {id_, byte_, w.c_str()};
+    }
+
+    /*!
+    @brief byte index of the parse error
+
+    The byte index of the last read character in the input file.
+
+    @note For an input with n bytes, 1 is the index of the first character and
+          n+1 is the index of the terminating null byte or the end of file.
+          This also holds true when reading a byte vector (CBOR or MessagePack).
+    */
+    const std::size_t byte;
+
+  private:
+    parse_error(int id_, std::size_t byte_, const char* what_arg)
+        : exception(id_, what_arg), byte(byte_) {}
+
+    static std::string position_string(const position_t& pos)
+    {
+        return concat(" at line ", std::to_string(pos.lines_read + 1),
+                      ", column ", std::to_string(pos.chars_read_current_line));
+    }
+};
+
+/// @brief exception indicating errors with iterators
+/// @sa https://json.qualla.me/api/basic_json/invalid_iterator/
+class invalid_iterator : public exception
+{
+  public:
+    template<typename BasicJsonContext, enable_if_t<is_basic_json_context<BasicJsonContext>::value, int> = 0>
+    static invalid_iterator create(int id_, const std::string& what_arg, BasicJsonContext context)
+    {
+        std::string w = concat(exception::name("invalid_iterator", id_), exception::diagnostics(context), what_arg);
+        return {id_, w.c_str()};
+    }
+
+  private:
+    JSON_HEDLEY_NON_NULL(3)
+    invalid_iterator(int id_, const char* what_arg)
+        : exception(id_, what_arg) {}
+};
+
+/// @brief exception indicating executing a member function with a wrong type
+/// @sa https://json.qualla.me/api/basic_json/type_error/
+class type_error : public exception
+{
+  public:
+    template<typename BasicJsonContext, enable_if_t<is_basic_json_context<BasicJsonContext>::value, int> = 0>
+    static type_error create(int id_, const std::string& what_arg, BasicJsonContext context)
+    {
+        std::string w = concat(exception::name("type_error", id_), exception::diagnostics(context), what_arg);
+        return {id_, w.c_str()};
+    }
+
+  private:
+    JSON_HEDLEY_NON_NULL(3)
+    type_error(int id_, const char* what_arg) : exception(id_, what_arg) {}
+};
+
+/// @brief exception indicating access out of the defined range
+/// @sa https://json.qualla.me/api/basic_json/out_of_range/
+class out_of_range : public exception
+{
+  public:
+    template<typename BasicJsonContext, enable_if_t<is_basic_json_context<BasicJsonContext>::value, int> = 0>
+    static out_of_range create(int id_, const std::string& what_arg, BasicJsonContext context)
+    {
+        std::string w = concat(exception::name("out_of_range", id_), exception::diagnostics(context), what_arg);
+        return {id_, w.c_str()};
+    }
+
+  private:
+    JSON_HEDLEY_NON_NULL(3)
+    out_of_range(int id_, const char* what_arg) : exception(id_, what_arg) {}
+};
+
+/// @brief exception indicating other library errors
+/// @sa https://json.qualla.me/api/basic_json/other_error/
+class other_error : public exception
+{
+  public:
+    template<typename BasicJsonContext, enable_if_t<is_basic_json_context<BasicJsonContext>::value, int> = 0>
+    static other_error create(int id_, const std::string& what_arg, BasicJsonContext context)
+    {
+        std::string w = concat(exception::name("other_error", id_), exception::diagnostics(context), what_arg);
+        return {id_, w.c_str()};
+    }
+
+  private:
+    JSON_HEDLEY_NON_NULL(3)
+    other_error(int id_, const char* what_arg) : exception(id_, what_arg) {}
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/meta/cpp_future.hpp>
+
+// #include <qualla/detail/meta/identity_tag.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+// #include <qualla/detail/abi_macros.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+// dispatching helper struct
+template <class T> struct identity_tag {};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/meta/std_fs.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+// #include <qualla/detail/macro_scope.hpp>
+
+
+#if JSON_HAS_EXPERIMENTAL_FILESYSTEM
+#include <experimental/filesystem>
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+namespace std_fs = std::experimental::filesystem;
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+#elif JSON_HAS_FILESYSTEM
+#include <filesystem>
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+namespace std_fs = std::filesystem;
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+#endif
+
+// #include <qualla/detail/meta/type_traits.hpp>
+
+// #include <qualla/detail/string_concat.hpp>
+
+// #include <qualla/detail/value_t.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+template<typename BasicJsonType>
+inline void from_json(const BasicJsonType& j, typename std::nullptr_t& n)
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_null()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be null, but is ", j.type_name()), &j));
+    }
+    n = nullptr;
+}
+
+// overloads for basic_json template parameters
+template < typename BasicJsonType, typename ArithmeticType,
+           enable_if_t < std::is_arithmetic<ArithmeticType>::value&&
+                         !std::is_same<ArithmeticType, typename BasicJsonType::boolean_t>::value,
+                         int > = 0 >
+void get_arithmetic_value(const BasicJsonType& j, ArithmeticType& val)
+{
+    switch (static_cast<value_t>(j))
+    {
+        case value_t::number_unsigned:
+        {
+            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_unsigned_t*>());
+            break;
+        }
+        case value_t::number_integer:
+        {
+            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_integer_t*>());
+            break;
+        }
+        case value_t::number_float:
+        {
+            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_float_t*>());
+            break;
+        }
+
+        case value_t::null:
+        case value_t::object:
+        case value_t::array:
+        case value_t::string:
+        case value_t::boolean:
+        case value_t::binary:
+        case value_t::discarded:
+        default:
+            JSON_THROW(type_error::create(302, concat("type must be number, but is ", j.type_name()), &j));
+    }
+}
+
+template<typename BasicJsonType>
+inline void from_json(const BasicJsonType& j, typename BasicJsonType::boolean_t& b)
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_boolean()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be boolean, but is ", j.type_name()), &j));
+    }
+    b = *j.template get_ptr<const typename BasicJsonType::boolean_t*>();
+}
+
+template<typename BasicJsonType>
+inline void from_json(const BasicJsonType& j, typename BasicJsonType::string_t& s)
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_string()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be string, but is ", j.type_name()), &j));
+    }
+    s = *j.template get_ptr<const typename BasicJsonType::string_t*>();
+}
+
+template <
+    typename BasicJsonType, typename StringType,
+    enable_if_t <
+        std::is_assignable<StringType&, const typename BasicJsonType::string_t>::value
+        && is_detected_exact<typename BasicJsonType::string_t::value_type, value_type_t, StringType>::value
+        && !std::is_same<typename BasicJsonType::string_t, StringType>::value
+        && !is_json_ref<StringType>::value, int > = 0 >
+inline void from_json(const BasicJsonType& j, StringType& s)
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_string()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be string, but is ", j.type_name()), &j));
+    }
+
+    s = *j.template get_ptr<const typename BasicJsonType::string_t*>();
+}
+
+template<typename BasicJsonType>
+inline void from_json(const BasicJsonType& j, typename BasicJsonType::number_float_t& val)
+{
+    get_arithmetic_value(j, val);
+}
+
+template<typename BasicJsonType>
+inline void from_json(const BasicJsonType& j, typename BasicJsonType::number_unsigned_t& val)
+{
+    get_arithmetic_value(j, val);
+}
+
+template<typename BasicJsonType>
+inline void from_json(const BasicJsonType& j, typename BasicJsonType::number_integer_t& val)
+{
+    get_arithmetic_value(j, val);
+}
+
+#if !JSON_DISABLE_ENUM_SERIALIZATION
+template<typename BasicJsonType, typename EnumType,
+         enable_if_t<std::is_enum<EnumType>::value, int> = 0>
+inline void from_json(const BasicJsonType& j, EnumType& e)
+{
+    typename std::underlying_type<EnumType>::type val;
+    get_arithmetic_value(j, val);
+    e = static_cast<EnumType>(val);
+}
+#endif  // JSON_DISABLE_ENUM_SERIALIZATION
+
+// forward_list doesn't have an insert method
+template<typename BasicJsonType, typename T, typename Allocator,
+         enable_if_t<is_getable<BasicJsonType, T>::value, int> = 0>
+inline void from_json(const BasicJsonType& j, std::forward_list<T, Allocator>& l)
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
+    }
+    l.clear();
+    std::transform(j.rbegin(), j.rend(),
+                   std::front_inserter(l), [](const BasicJsonType & i)
+    {
+        return i.template get<T>();
+    });
+}
+
+// valarray doesn't have an insert method
+template<typename BasicJsonType, typename T,
+         enable_if_t<is_getable<BasicJsonType, T>::value, int> = 0>
+inline void from_json(const BasicJsonType& j, std::valarray<T>& l)
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
+    }
+    l.resize(j.size());
+    std::transform(j.begin(), j.end(), std::begin(l),
+                   [](const BasicJsonType & elem)
+    {
+        return elem.template get<T>();
+    });
+}
+
+template<typename BasicJsonType, typename T, std::size_t N>
+auto from_json(const BasicJsonType& j, T (&arr)[N])  // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
+-> decltype(j.template get<T>(), void())
+{
+    for (std::size_t i = 0; i < N; ++i)
+    {
+        arr[i] = j.at(i).template get<T>();
+    }
+}
+
+template<typename BasicJsonType>
+inline void from_json_array_impl(const BasicJsonType& j, typename BasicJsonType::array_t& arr, priority_tag<3> /*unused*/)
+{
+    arr = *j.template get_ptr<const typename BasicJsonType::array_t*>();
+}
+
+template<typename BasicJsonType, typename T, std::size_t N>
+auto from_json_array_impl(const BasicJsonType& j, std::array<T, N>& arr,
+                          priority_tag<2> /*unused*/)
+-> decltype(j.template get<T>(), void())
+{
+    for (std::size_t i = 0; i < N; ++i)
+    {
+        arr[i] = j.at(i).template get<T>();
+    }
+}
+
+template<typename BasicJsonType, typename ConstructibleArrayType,
+         enable_if_t<
+             std::is_assignable<ConstructibleArrayType&, ConstructibleArrayType>::value,
+             int> = 0>
+auto from_json_array_impl(const BasicJsonType& j, ConstructibleArrayType& arr, priority_tag<1> /*unused*/)
+-> decltype(
+    arr.reserve(std::declval<typename ConstructibleArrayType::size_type>()),
+    j.template get<typename ConstructibleArrayType::value_type>(),
+    void())
+{
+    using std::end;
+
+    ConstructibleArrayType ret;
+    ret.reserve(j.size());
+    std::transform(j.begin(), j.end(),
+                   std::inserter(ret, end(ret)), [](const BasicJsonType & i)
+    {
+        // get<BasicJsonType>() returns *this, this won't call a from_json
+        // method when value_type is BasicJsonType
+        return i.template get<typename ConstructibleArrayType::value_type>();
+    });
+    arr = std::move(ret);
+}
+
+template<typename BasicJsonType, typename ConstructibleArrayType,
+         enable_if_t<
+             std::is_assignable<ConstructibleArrayType&, ConstructibleArrayType>::value,
+             int> = 0>
+inline void from_json_array_impl(const BasicJsonType& j, ConstructibleArrayType& arr,
+                                 priority_tag<0> /*unused*/)
+{
+    using std::end;
+
+    ConstructibleArrayType ret;
+    std::transform(
+        j.begin(), j.end(), std::inserter(ret, end(ret)),
+        [](const BasicJsonType & i)
+    {
+        // get<BasicJsonType>() returns *this, this won't call a from_json
+        // method when value_type is BasicJsonType
+        return i.template get<typename ConstructibleArrayType::value_type>();
+    });
+    arr = std::move(ret);
+}
+
+template < typename BasicJsonType, typename ConstructibleArrayType,
+           enable_if_t <
+               is_constructible_array_type<BasicJsonType, ConstructibleArrayType>::value&&
+               !is_constructible_object_type<BasicJsonType, ConstructibleArrayType>::value&&
+               !is_constructible_string_type<BasicJsonType, ConstructibleArrayType>::value&&
+               !std::is_same<ConstructibleArrayType, typename BasicJsonType::binary_t>::value&&
+               !is_basic_json<ConstructibleArrayType>::value,
+               int > = 0 >
+auto from_json(const BasicJsonType& j, ConstructibleArrayType& arr)
+-> decltype(from_json_array_impl(j, arr, priority_tag<3> {}),
+j.template get<typename ConstructibleArrayType::value_type>(),
+void())
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
+    }
+
+    from_json_array_impl(j, arr, priority_tag<3> {});
+}
+
+template < typename BasicJsonType, typename T, std::size_t... Idx >
+std::array<T, sizeof...(Idx)> from_json_inplace_array_impl(BasicJsonType&& j,
+        identity_tag<std::array<T, sizeof...(Idx)>> /*unused*/, index_sequence<Idx...> /*unused*/)
+{
+    return { { std::forward<BasicJsonType>(j).at(Idx).template get<T>()... } };
+}
+
+template < typename BasicJsonType, typename T, std::size_t N >
+auto from_json(BasicJsonType&& j, identity_tag<std::array<T, N>> tag)
+-> decltype(from_json_inplace_array_impl(std::forward<BasicJsonType>(j), tag, make_index_sequence<N> {}))
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
+    }
+
+    return from_json_inplace_array_impl(std::forward<BasicJsonType>(j), tag, make_index_sequence<N> {});
+}
+
+template<typename BasicJsonType>
+inline void from_json(const BasicJsonType& j, typename BasicJsonType::binary_t& bin)
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_binary()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be binary, but is ", j.type_name()), &j));
+    }
+
+    bin = *j.template get_ptr<const typename BasicJsonType::binary_t*>();
+}
+
+template<typename BasicJsonType, typename ConstructibleObjectType,
+         enable_if_t<is_constructible_object_type<BasicJsonType, ConstructibleObjectType>::value, int> = 0>
+inline void from_json(const BasicJsonType& j, ConstructibleObjectType& obj)
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_object()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be object, but is ", j.type_name()), &j));
+    }
+
+    ConstructibleObjectType ret;
+    const auto* inner_object = j.template get_ptr<const typename BasicJsonType::object_t*>();
+    using value_type = typename ConstructibleObjectType::value_type;
+    std::transform(
+        inner_object->begin(), inner_object->end(),
+        std::inserter(ret, ret.begin()),
+        [](typename BasicJsonType::object_t::value_type const & p)
+    {
+        return value_type(p.first, p.second.template get<typename ConstructibleObjectType::mapped_type>());
+    });
+    obj = std::move(ret);
+}
+
+// overload for arithmetic types, not chosen for basic_json template arguments
+// (BooleanType, etc..); note: Is it really necessary to provide explicit
+// overloads for boolean_t etc. in case of a custom BooleanType which is not
+// an arithmetic type?
+template < typename BasicJsonType, typename ArithmeticType,
+           enable_if_t <
+               std::is_arithmetic<ArithmeticType>::value&&
+               !std::is_same<ArithmeticType, typename BasicJsonType::number_unsigned_t>::value&&
+               !std::is_same<ArithmeticType, typename BasicJsonType::number_integer_t>::value&&
+               !std::is_same<ArithmeticType, typename BasicJsonType::number_float_t>::value&&
+               !std::is_same<ArithmeticType, typename BasicJsonType::boolean_t>::value,
+               int > = 0 >
+inline void from_json(const BasicJsonType& j, ArithmeticType& val)
+{
+    switch (static_cast<value_t>(j))
+    {
+        case value_t::number_unsigned:
+        {
+            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_unsigned_t*>());
+            break;
+        }
+        case value_t::number_integer:
+        {
+            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_integer_t*>());
+            break;
+        }
+        case value_t::number_float:
+        {
+            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_float_t*>());
+            break;
+        }
+        case value_t::boolean:
+        {
+            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::boolean_t*>());
+            break;
+        }
+
+        case value_t::null:
+        case value_t::object:
+        case value_t::array:
+        case value_t::string:
+        case value_t::binary:
+        case value_t::discarded:
+        default:
+            JSON_THROW(type_error::create(302, concat("type must be number, but is ", j.type_name()), &j));
+    }
+}
+
+template<typename BasicJsonType, typename... Args, std::size_t... Idx>
+std::tuple<Args...> from_json_tuple_impl_base(BasicJsonType&& j, index_sequence<Idx...> /*unused*/)
+{
+    return std::make_tuple(std::forward<BasicJsonType>(j).at(Idx).template get<Args>()...);
+}
+
+template < typename BasicJsonType, class A1, class A2 >
+std::pair<A1, A2> from_json_tuple_impl(BasicJsonType&& j, identity_tag<std::pair<A1, A2>> /*unused*/, priority_tag<0> /*unused*/)
+{
+    return {std::forward<BasicJsonType>(j).at(0).template get<A1>(),
+            std::forward<BasicJsonType>(j).at(1).template get<A2>()};
+}
+
+template<typename BasicJsonType, typename A1, typename A2>
+inline void from_json_tuple_impl(BasicJsonType&& j, std::pair<A1, A2>& p, priority_tag<1> /*unused*/)
+{
+    p = from_json_tuple_impl(std::forward<BasicJsonType>(j), identity_tag<std::pair<A1, A2>> {}, priority_tag<0> {});
+}
+
+template<typename BasicJsonType, typename... Args>
+std::tuple<Args...> from_json_tuple_impl(BasicJsonType&& j, identity_tag<std::tuple<Args...>> /*unused*/, priority_tag<2> /*unused*/)
+{
+    return from_json_tuple_impl_base<BasicJsonType, Args...>(std::forward<BasicJsonType>(j), index_sequence_for<Args...> {});
+}
+
+template<typename BasicJsonType, typename... Args>
+inline void from_json_tuple_impl(BasicJsonType&& j, std::tuple<Args...>& t, priority_tag<3> /*unused*/)
+{
+    t = from_json_tuple_impl_base<BasicJsonType, Args...>(std::forward<BasicJsonType>(j), index_sequence_for<Args...> {});
+}
+
+template<typename BasicJsonType, typename TupleRelated>
+auto from_json(BasicJsonType&& j, TupleRelated&& t)
+-> decltype(from_json_tuple_impl(std::forward<BasicJsonType>(j), std::forward<TupleRelated>(t), priority_tag<3> {}))
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
+    }
+
+    return from_json_tuple_impl(std::forward<BasicJsonType>(j), std::forward<TupleRelated>(t), priority_tag<3> {});
+}
+
+template < typename BasicJsonType, typename Key, typename Value, typename Compare, typename Allocator,
+           typename = enable_if_t < !std::is_constructible <
+                                        typename BasicJsonType::string_t, Key >::value >>
+inline void from_json(const BasicJsonType& j, std::map<Key, Value, Compare, Allocator>& m)
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
+    }
+    m.clear();
+    for (const auto& p : j)
+    {
+        if (JSON_HEDLEY_UNLIKELY(!p.is_array()))
+        {
+            JSON_THROW(type_error::create(302, concat("type must be array, but is ", p.type_name()), &j));
+        }
+        m.emplace(p.at(0).template get<Key>(), p.at(1).template get<Value>());
+    }
+}
+
+template < typename BasicJsonType, typename Key, typename Value, typename Hash, typename KeyEqual, typename Allocator,
+           typename = enable_if_t < !std::is_constructible <
+                                        typename BasicJsonType::string_t, Key >::value >>
+inline void from_json(const BasicJsonType& j, std::unordered_map<Key, Value, Hash, KeyEqual, Allocator>& m)
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
+    }
+    m.clear();
+    for (const auto& p : j)
+    {
+        if (JSON_HEDLEY_UNLIKELY(!p.is_array()))
+        {
+            JSON_THROW(type_error::create(302, concat("type must be array, but is ", p.type_name()), &j));
+        }
+        m.emplace(p.at(0).template get<Key>(), p.at(1).template get<Value>());
+    }
+}
+
+#if JSON_HAS_FILESYSTEM || JSON_HAS_EXPERIMENTAL_FILESYSTEM
+template<typename BasicJsonType>
+inline void from_json(const BasicJsonType& j, std_fs::path& p)
+{
+    if (JSON_HEDLEY_UNLIKELY(!j.is_string()))
+    {
+        JSON_THROW(type_error::create(302, concat("type must be string, but is ", j.type_name()), &j));
+    }
+    p = *j.template get_ptr<const typename BasicJsonType::string_t*>();
+}
+#endif
+
+struct from_json_fn
+{
+    template<typename BasicJsonType, typename T>
+    auto operator()(const BasicJsonType& j, T&& val) const
+    noexcept(noexcept(from_json(j, std::forward<T>(val))))
+    -> decltype(from_json(j, std::forward<T>(val)))
+    {
+        return from_json(j, std::forward<T>(val));
+    }
+};
+
+}  // namespace detail
+
+#ifndef JSON_HAS_CPP_17
+/// namespace to hold default `from_json` function
+/// to see why this is required:
+/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4381.html
+namespace // NOLINT(cert-dcl59-cpp,fuchsia-header-anon-namespaces,google-build-namespaces)
+{
+#endif
+JSON_INLINE_VARIABLE constexpr const auto& from_json = // NOLINT(misc-definitions-in-headers)
+    detail::static_const<detail::from_json_fn>::value;
+#ifndef JSON_HAS_CPP_17
+}  // namespace
+#endif
+
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/conversions/to_json.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <algorithm> // copy
+#include <iterator> // begin, end
+#include <string> // string
+#include <tuple> // tuple, get
+#include <type_traits> // is_same, is_constructible, is_floating_point, is_enum, underlying_type
+#include <utility> // move, forward, declval, pair
+#include <valarray> // valarray
+#include <vector> // vector
+
+// #include <qualla/detail/iterators/iteration_proxy.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <cstddef> // size_t
+#include <iterator> // input_iterator_tag
+#include <string> // string, to_string
+#include <tuple> // tuple_size, get, tuple_element
+#include <utility> // move
+
+#if JSON_HAS_RANGES
+    #include <ranges> // enable_borrowed_range
+#endif
+
+// #include <qualla/detail/abi_macros.hpp>
+
+// #include <qualla/detail/meta/type_traits.hpp>
+
+// #include <qualla/detail/value_t.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+template<typename string_type>
+void int_to_string( string_type& target, std::size_t value )
+{
+    // For ADL
+    using std::to_string;
+    target = to_string(value);
+}
+template<typename IteratorType> class iteration_proxy_value
+{
+  public:
+    using difference_type = std::ptrdiff_t;
+    using value_type = iteration_proxy_value;
+    using pointer = value_type *;
+    using reference = value_type &;
+    using iterator_category = std::input_iterator_tag;
+    using string_type = typename std::remove_cv< typename std::remove_reference<decltype( std::declval<IteratorType>().key() ) >::type >::type;
+
+  private:
+    /// the iterator
+    IteratorType anchor{};
+    /// an index for arrays (used to create key names)
+    std::size_t array_index = 0;
+    /// last stringified array index
+    mutable std::size_t array_index_last = 0;
+    /// a string representation of the array index
+    mutable string_type array_index_str = "0";
+    /// an empty string (to return a reference for primitive values)
+    string_type empty_str{};
+
+  public:
+    explicit iteration_proxy_value() = default;
+    explicit iteration_proxy_value(IteratorType it, std::size_t array_index_ = 0)
+    noexcept(std::is_nothrow_move_constructible<IteratorType>::value
+             && std::is_nothrow_default_constructible<string_type>::value)
+        : anchor(std::move(it))
+        , array_index(array_index_)
+    {}
+
+    iteration_proxy_value(iteration_proxy_value const&) = default;
+    iteration_proxy_value& operator=(iteration_proxy_value const&) = default;
+    // older GCCs are a bit fussy and require explicit noexcept specifiers on defaulted functions
+    iteration_proxy_value(iteration_proxy_value&&)
+    noexcept(std::is_nothrow_move_constructible<IteratorType>::value
+             && std::is_nothrow_move_constructible<string_type>::value) = default;
+    iteration_proxy_value& operator=(iteration_proxy_value&&)
+    noexcept(std::is_nothrow_move_assignable<IteratorType>::value
+             && std::is_nothrow_move_assignable<string_type>::value) = default;
+    ~iteration_proxy_value() = default;
+
+    /// dereference operator (needed for range-based for)
+    const iteration_proxy_value& operator*() const
+    {
+        return *this;
+    }
+
+    /// increment operator (needed for range-based for)
+    iteration_proxy_value& operator++()
+    {
+        ++anchor;
+        ++array_index;
+
+        return *this;
+    }
+
+    iteration_proxy_value operator++(int)& // NOLINT(cert-dcl21-cpp)
+    {
+        auto tmp = iteration_proxy_value(anchor, array_index);
+        ++anchor;
+        ++array_index;
+        return tmp;
+    }
+
+    /// equality operator (needed for InputIterator)
+    bool operator==(const iteration_proxy_value& o) const
+    {
+        return anchor == o.anchor;
+    }
+
+    /// inequality operator (needed for range-based for)
+    bool operator!=(const iteration_proxy_value& o) const
+    {
+        return anchor != o.anchor;
+    }
+
+    /// return key of the iterator
+    const string_type& key() const
+    {
+        JSON_ASSERT(anchor.m_object != nullptr);
+
+        switch (anchor.m_object->type())
+        {
+            // use integer array index as key
+            case value_t::array:
+            {
+                if (array_index != array_index_last)
+                {
+                    int_to_string( array_index_str, array_index );
+                    array_index_last = array_index;
+                }
+                return array_index_str;
+            }
+
+            // use key from the object
+            case value_t::object:
+                return anchor.key();
+
+            // use an empty key for all primitive types
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+                return empty_str;
+        }
+    }
+
+    /// return value of the iterator
+    typename IteratorType::reference value() const
+    {
+        return anchor.value();
+    }
+};
+
+/// proxy class for the items() function
+template<typename IteratorType> class iteration_proxy
+{
+  private:
+    /// the container to iterate
+    typename IteratorType::pointer container = nullptr;
+
+  public:
+    explicit iteration_proxy() = default;
+
+    /// construct iteration proxy from a container
+    explicit iteration_proxy(typename IteratorType::reference cont) noexcept
+        : container(&cont) {}
+
+    iteration_proxy(iteration_proxy const&) = default;
+    iteration_proxy& operator=(iteration_proxy const&) = default;
+    iteration_proxy(iteration_proxy&&) noexcept = default;
+    iteration_proxy& operator=(iteration_proxy&&) noexcept = default;
+    ~iteration_proxy() = default;
+
+    /// return iterator begin (needed for range-based for)
+    iteration_proxy_value<IteratorType> begin() const noexcept
+    {
+        return iteration_proxy_value<IteratorType>(container->begin());
+    }
+
+    /// return iterator end (needed for range-based for)
+    iteration_proxy_value<IteratorType> end() const noexcept
+    {
+        return iteration_proxy_value<IteratorType>(container->end());
+    }
+};
+
+// Structured Bindings Support
+// For further reference see https://blog.tartanllama.xyz/structured-bindings/
+// And see https://github.com/nlohmann/json/pull/1391
+template<std::size_t N, typename IteratorType, enable_if_t<N == 0, int> = 0>
+auto get(const qualla::detail::iteration_proxy_value<IteratorType>& i) -> decltype(i.key())
+{
+    return i.key();
+}
+// Structured Bindings Support
+// For further reference see https://blog.tartanllama.xyz/structured-bindings/
+// And see https://github.com/nlohmann/json/pull/1391
+template<std::size_t N, typename IteratorType, enable_if_t<N == 1, int> = 0>
+auto get(const qualla::detail::iteration_proxy_value<IteratorType>& i) -> decltype(i.value())
+{
+    return i.value();
+}
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// The Addition to the STD Namespace is required to add
+// Structured Bindings Support to the iteration_proxy_value class
+// For further reference see https://blog.tartanllama.xyz/structured-bindings/
+// And see https://github.com/nlohmann/json/pull/1391
+namespace std
+{
+
+#if defined(__clang__)
+    // Fix: https://github.com/nlohmann/json/issues/1401
+    #pragma clang diagnostic push
+    #pragma clang diagnostic ignored "-Wmismatched-tags"
+#endif
+template<typename IteratorType>
+class tuple_size<::qualla::detail::iteration_proxy_value<IteratorType>>
+            : public std::integral_constant<std::size_t, 2> {};
+
+template<std::size_t N, typename IteratorType>
+class tuple_element<N, ::qualla::detail::iteration_proxy_value<IteratorType >>
+{
+  public:
+    using type = decltype(
+                     get<N>(std::declval <
+                            ::qualla::detail::iteration_proxy_value<IteratorType >> ()));
+};
+#if defined(__clang__)
+    #pragma clang diagnostic pop
+#endif
+
+}  // namespace std
+
+#if JSON_HAS_RANGES
+    template <typename IteratorType>
+    inline constexpr bool ::std::ranges::enable_borrowed_range<::qualla::detail::iteration_proxy<IteratorType>> = true;
+#endif
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/meta/cpp_future.hpp>
+
+// #include <qualla/detail/meta/std_fs.hpp>
+
+// #include <qualla/detail/meta/type_traits.hpp>
+
+// #include <qualla/detail/value_t.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+//////////////////
+// constructors //
+//////////////////
+
+/*
+ * Note all external_constructor<>::construct functions need to call
+ * j.m_value.destroy(j.m_type) to avoid a memory leak in case j contains an
+ * allocated value (e.g., a string). See bug issue
+ * https://github.com/nlohmann/json/issues/2865 for more information.
+ */
+
+template<value_t> struct external_constructor;
+
+template<>
+struct external_constructor<value_t::boolean>
+{
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, typename BasicJsonType::boolean_t b) noexcept
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::boolean;
+        j.m_value = b;
+        j.assert_invariant();
+    }
+};
+
+template<>
+struct external_constructor<value_t::string>
+{
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, const typename BasicJsonType::string_t& s)
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::string;
+        j.m_value = s;
+        j.assert_invariant();
+    }
+
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, typename BasicJsonType::string_t&& s)
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::string;
+        j.m_value = std::move(s);
+        j.assert_invariant();
+    }
+
+    template < typename BasicJsonType, typename CompatibleStringType,
+               enable_if_t < !std::is_same<CompatibleStringType, typename BasicJsonType::string_t>::value,
+                             int > = 0 >
+    static void construct(BasicJsonType& j, const CompatibleStringType& str)
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::string;
+        j.m_value.string = j.template create<typename BasicJsonType::string_t>(str);
+        j.assert_invariant();
+    }
+};
+
+template<>
+struct external_constructor<value_t::binary>
+{
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, const typename BasicJsonType::binary_t& b)
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::binary;
+        j.m_value = typename BasicJsonType::binary_t(b);
+        j.assert_invariant();
+    }
+
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, typename BasicJsonType::binary_t&& b)
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::binary;
+        j.m_value = typename BasicJsonType::binary_t(std::move(b));
+        j.assert_invariant();
+    }
+};
+
+template<>
+struct external_constructor<value_t::number_float>
+{
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, typename BasicJsonType::number_float_t val) noexcept
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::number_float;
+        j.m_value = val;
+        j.assert_invariant();
+    }
+};
+
+template<>
+struct external_constructor<value_t::number_unsigned>
+{
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, typename BasicJsonType::number_unsigned_t val) noexcept
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::number_unsigned;
+        j.m_value = val;
+        j.assert_invariant();
+    }
+};
+
+template<>
+struct external_constructor<value_t::number_integer>
+{
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, typename BasicJsonType::number_integer_t val) noexcept
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::number_integer;
+        j.m_value = val;
+        j.assert_invariant();
+    }
+};
+
+template<>
+struct external_constructor<value_t::array>
+{
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, const typename BasicJsonType::array_t& arr)
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::array;
+        j.m_value = arr;
+        j.set_parents();
+        j.assert_invariant();
+    }
+
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, typename BasicJsonType::array_t&& arr)
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::array;
+        j.m_value = std::move(arr);
+        j.set_parents();
+        j.assert_invariant();
+    }
+
+    template < typename BasicJsonType, typename CompatibleArrayType,
+               enable_if_t < !std::is_same<CompatibleArrayType, typename BasicJsonType::array_t>::value,
+                             int > = 0 >
+    static void construct(BasicJsonType& j, const CompatibleArrayType& arr)
+    {
+        using std::begin;
+        using std::end;
+
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::array;
+        j.m_value.array = j.template create<typename BasicJsonType::array_t>(begin(arr), end(arr));
+        j.set_parents();
+        j.assert_invariant();
+    }
+
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, const std::vector<bool>& arr)
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::array;
+        j.m_value = value_t::array;
+        j.m_value.array->reserve(arr.size());
+        for (const bool x : arr)
+        {
+            j.m_value.array->push_back(x);
+            j.set_parent(j.m_value.array->back());
+        }
+        j.assert_invariant();
+    }
+
+    template<typename BasicJsonType, typename T,
+             enable_if_t<std::is_convertible<T, BasicJsonType>::value, int> = 0>
+    static void construct(BasicJsonType& j, const std::valarray<T>& arr)
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::array;
+        j.m_value = value_t::array;
+        j.m_value.array->resize(arr.size());
+        if (arr.size() > 0)
+        {
+            std::copy(std::begin(arr), std::end(arr), j.m_value.array->begin());
+        }
+        j.set_parents();
+        j.assert_invariant();
+    }
+};
+
+template<>
+struct external_constructor<value_t::object>
+{
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, const typename BasicJsonType::object_t& obj)
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::object;
+        j.m_value = obj;
+        j.set_parents();
+        j.assert_invariant();
+    }
+
+    template<typename BasicJsonType>
+    static void construct(BasicJsonType& j, typename BasicJsonType::object_t&& obj)
+    {
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::object;
+        j.m_value = std::move(obj);
+        j.set_parents();
+        j.assert_invariant();
+    }
+
+    template < typename BasicJsonType, typename CompatibleObjectType,
+               enable_if_t < !std::is_same<CompatibleObjectType, typename BasicJsonType::object_t>::value, int > = 0 >
+    static void construct(BasicJsonType& j, const CompatibleObjectType& obj)
+    {
+        using std::begin;
+        using std::end;
+
+        j.m_value.destroy(j.m_type);
+        j.m_type = value_t::object;
+        j.m_value.object = j.template create<typename BasicJsonType::object_t>(begin(obj), end(obj));
+        j.set_parents();
+        j.assert_invariant();
+    }
+};
+
+/////////////
+// to_json //
+/////////////
+
+template<typename BasicJsonType, typename T,
+         enable_if_t<std::is_same<T, typename BasicJsonType::boolean_t>::value, int> = 0>
+inline void to_json(BasicJsonType& j, T b) noexcept
+{
+    external_constructor<value_t::boolean>::construct(j, b);
+}
+
+template < typename BasicJsonType, typename BoolRef,
+           enable_if_t <
+               ((std::is_same<std::vector<bool>::reference, BoolRef>::value
+                 && !std::is_same <std::vector<bool>::reference, typename BasicJsonType::boolean_t&>::value)
+                || (std::is_same<std::vector<bool>::const_reference, BoolRef>::value
+                    && !std::is_same <detail::uncvref_t<std::vector<bool>::const_reference>,
+                                      typename BasicJsonType::boolean_t >::value))
+               && std::is_convertible<const BoolRef&, typename BasicJsonType::boolean_t>::value, int > = 0 >
+inline void to_json(BasicJsonType& j, const BoolRef& b) noexcept
+{
+    external_constructor<value_t::boolean>::construct(j, static_cast<typename BasicJsonType::boolean_t>(b));
+}
+
+template<typename BasicJsonType, typename CompatibleString,
+         enable_if_t<std::is_constructible<typename BasicJsonType::string_t, CompatibleString>::value, int> = 0>
+inline void to_json(BasicJsonType& j, const CompatibleString& s)
+{
+    external_constructor<value_t::string>::construct(j, s);
+}
+
+template<typename BasicJsonType>
+inline void to_json(BasicJsonType& j, typename BasicJsonType::string_t&& s)
+{
+    external_constructor<value_t::string>::construct(j, std::move(s));
+}
+
+template<typename BasicJsonType, typename FloatType,
+         enable_if_t<std::is_floating_point<FloatType>::value, int> = 0>
+inline void to_json(BasicJsonType& j, FloatType val) noexcept
+{
+    external_constructor<value_t::number_float>::construct(j, static_cast<typename BasicJsonType::number_float_t>(val));
+}
+
+template<typename BasicJsonType, typename CompatibleNumberUnsignedType,
+         enable_if_t<is_compatible_integer_type<typename BasicJsonType::number_unsigned_t, CompatibleNumberUnsignedType>::value, int> = 0>
+inline void to_json(BasicJsonType& j, CompatibleNumberUnsignedType val) noexcept
+{
+    external_constructor<value_t::number_unsigned>::construct(j, static_cast<typename BasicJsonType::number_unsigned_t>(val));
+}
+
+template<typename BasicJsonType, typename CompatibleNumberIntegerType,
+         enable_if_t<is_compatible_integer_type<typename BasicJsonType::number_integer_t, CompatibleNumberIntegerType>::value, int> = 0>
+inline void to_json(BasicJsonType& j, CompatibleNumberIntegerType val) noexcept
+{
+    external_constructor<value_t::number_integer>::construct(j, static_cast<typename BasicJsonType::number_integer_t>(val));
+}
+
+#if !JSON_DISABLE_ENUM_SERIALIZATION
+template<typename BasicJsonType, typename EnumType,
+         enable_if_t<std::is_enum<EnumType>::value, int> = 0>
+inline void to_json(BasicJsonType& j, EnumType e) noexcept
+{
+    using underlying_type = typename std::underlying_type<EnumType>::type;
+    external_constructor<value_t::number_integer>::construct(j, static_cast<underlying_type>(e));
+}
+#endif  // JSON_DISABLE_ENUM_SERIALIZATION
+
+template<typename BasicJsonType>
+inline void to_json(BasicJsonType& j, const std::vector<bool>& e)
+{
+    external_constructor<value_t::array>::construct(j, e);
+}
+
+template < typename BasicJsonType, typename CompatibleArrayType,
+           enable_if_t < is_compatible_array_type<BasicJsonType,
+                         CompatibleArrayType>::value&&
+                         !is_compatible_object_type<BasicJsonType, CompatibleArrayType>::value&&
+                         !is_compatible_string_type<BasicJsonType, CompatibleArrayType>::value&&
+                         !std::is_same<typename BasicJsonType::binary_t, CompatibleArrayType>::value&&
+                         !is_basic_json<CompatibleArrayType>::value,
+                         int > = 0 >
+inline void to_json(BasicJsonType& j, const CompatibleArrayType& arr)
+{
+    external_constructor<value_t::array>::construct(j, arr);
+}
+
+template<typename BasicJsonType>
+inline void to_json(BasicJsonType& j, const typename BasicJsonType::binary_t& bin)
+{
+    external_constructor<value_t::binary>::construct(j, bin);
+}
+
+template<typename BasicJsonType, typename T,
+         enable_if_t<std::is_convertible<T, BasicJsonType>::value, int> = 0>
+inline void to_json(BasicJsonType& j, const std::valarray<T>& arr)
+{
+    external_constructor<value_t::array>::construct(j, std::move(arr));
+}
+
+template<typename BasicJsonType>
+inline void to_json(BasicJsonType& j, typename BasicJsonType::array_t&& arr)
+{
+    external_constructor<value_t::array>::construct(j, std::move(arr));
+}
+
+template < typename BasicJsonType, typename CompatibleObjectType,
+           enable_if_t < is_compatible_object_type<BasicJsonType, CompatibleObjectType>::value&& !is_basic_json<CompatibleObjectType>::value, int > = 0 >
+inline void to_json(BasicJsonType& j, const CompatibleObjectType& obj)
+{
+    external_constructor<value_t::object>::construct(j, obj);
+}
+
+template<typename BasicJsonType>
+inline void to_json(BasicJsonType& j, typename BasicJsonType::object_t&& obj)
+{
+    external_constructor<value_t::object>::construct(j, std::move(obj));
+}
+
+template <
+    typename BasicJsonType, typename T, std::size_t N,
+    enable_if_t < !std::is_constructible<typename BasicJsonType::string_t,
+                  const T(&)[N]>::value, // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
+                  int > = 0 >
+inline void to_json(BasicJsonType& j, const T(&arr)[N]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
+{
+    external_constructor<value_t::array>::construct(j, arr);
+}
+
+template < typename BasicJsonType, typename T1, typename T2, enable_if_t < std::is_constructible<BasicJsonType, T1>::value&& std::is_constructible<BasicJsonType, T2>::value, int > = 0 >
+inline void to_json(BasicJsonType& j, const std::pair<T1, T2>& p)
+{
+    j = { p.first, p.second };
+}
+
+// for https://github.com/nlohmann/json/pull/1134
+template<typename BasicJsonType, typename T,
+         enable_if_t<std::is_same<T, iteration_proxy_value<typename BasicJsonType::iterator>>::value, int> = 0>
+inline void to_json(BasicJsonType& j, const T& b)
+{
+    j = { {b.key(), b.value()} };
+}
+
+template<typename BasicJsonType, typename Tuple, std::size_t... Idx>
+inline void to_json_tuple_impl(BasicJsonType& j, const Tuple& t, index_sequence<Idx...> /*unused*/)
+{
+    j = { std::get<Idx>(t)... };
+}
+
+template<typename BasicJsonType, typename T, enable_if_t<is_constructible_tuple<BasicJsonType, T>::value, int > = 0>
+inline void to_json(BasicJsonType& j, const T& t)
+{
+    to_json_tuple_impl(j, t, make_index_sequence<std::tuple_size<T>::value> {});
+}
+
+#if JSON_HAS_FILESYSTEM || JSON_HAS_EXPERIMENTAL_FILESYSTEM
+template<typename BasicJsonType>
+inline void to_json(BasicJsonType& j, const std_fs::path& p)
+{
+    j = p.string();
+}
+#endif
+
+struct to_json_fn
+{
+    template<typename BasicJsonType, typename T>
+    auto operator()(BasicJsonType& j, T&& val) const noexcept(noexcept(to_json(j, std::forward<T>(val))))
+    -> decltype(to_json(j, std::forward<T>(val)), void())
+    {
+        return to_json(j, std::forward<T>(val));
+    }
+};
+}  // namespace detail
+
+#ifndef JSON_HAS_CPP_17
+/// namespace to hold default `to_json` function
+/// to see why this is required:
+/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4381.html
+namespace // NOLINT(cert-dcl59-cpp,fuchsia-header-anon-namespaces,google-build-namespaces)
+{
+#endif
+JSON_INLINE_VARIABLE constexpr const auto& to_json = // NOLINT(misc-definitions-in-headers)
+    detail::static_const<detail::to_json_fn>::value;
+#ifndef JSON_HAS_CPP_17
+}  // namespace
+#endif
+
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/meta/identity_tag.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+
+/// @sa https://json.qualla.me/api/adl_serializer/
+template<typename ValueType, typename>
+struct adl_serializer
+{
+    /// @brief convert a JSON value to any value type
+    /// @sa https://json.qualla.me/api/adl_serializer/from_json/
+    template<typename BasicJsonType, typename TargetType = ValueType>
+    static auto from_json(BasicJsonType && j, TargetType& val) noexcept(
+        noexcept(::qualla::from_json(std::forward<BasicJsonType>(j), val)))
+    -> decltype(::qualla::from_json(std::forward<BasicJsonType>(j), val), void())
+    {
+        ::qualla::from_json(std::forward<BasicJsonType>(j), val);
+    }
+
+    /// @brief convert a JSON value to any value type
+    /// @sa https://json.qualla.me/api/adl_serializer/from_json/
+    template<typename BasicJsonType, typename TargetType = ValueType>
+    static auto from_json(BasicJsonType && j) noexcept(
+    noexcept(::qualla::from_json(std::forward<BasicJsonType>(j), detail::identity_tag<TargetType> {})))
+    -> decltype(::qualla::from_json(std::forward<BasicJsonType>(j), detail::identity_tag<TargetType> {}))
+    {
+        return ::qualla::from_json(std::forward<BasicJsonType>(j), detail::identity_tag<TargetType> {});
+    }
+
+    /// @brief convert any value type to a JSON value
+    /// @sa https://json.qualla.me/api/adl_serializer/to_json/
+    template<typename BasicJsonType, typename TargetType = ValueType>
+    static auto to_json(BasicJsonType& j, TargetType && val) noexcept(
+        noexcept(::qualla::to_json(j, std::forward<TargetType>(val))))
+    -> decltype(::qualla::to_json(j, std::forward<TargetType>(val)), void())
+    {
+        ::qualla::to_json(j, std::forward<TargetType>(val));
+    }
+};
+
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/byte_container_with_subtype.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <cstdint> // uint8_t, uint64_t
+#include <tuple> // tie
+#include <utility> // move
+
+// #include <qualla/detail/abi_macros.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+
+/// @brief an internal type for a backed binary type
+/// @sa https://json.qualla.me/api/byte_container_with_subtype/
+template<typename BinaryType>
+class byte_container_with_subtype : public BinaryType
+{
+  public:
+    using container_type = BinaryType;
+    using subtype_type = std::uint64_t;
+
+    /// @sa https://json.qualla.me/api/byte_container_with_subtype/byte_container_with_subtype/
+    byte_container_with_subtype() noexcept(noexcept(container_type()))
+        : container_type()
+    {}
+
+    /// @sa https://json.qualla.me/api/byte_container_with_subtype/byte_container_with_subtype/
+    byte_container_with_subtype(const container_type& b) noexcept(noexcept(container_type(b)))
+        : container_type(b)
+    {}
+
+    /// @sa https://json.qualla.me/api/byte_container_with_subtype/byte_container_with_subtype/
+    byte_container_with_subtype(container_type&& b) noexcept(noexcept(container_type(std::move(b))))
+        : container_type(std::move(b))
+    {}
+
+    /// @sa https://json.qualla.me/api/byte_container_with_subtype/byte_container_with_subtype/
+    byte_container_with_subtype(const container_type& b, subtype_type subtype_) noexcept(noexcept(container_type(b)))
+        : container_type(b)
+        , m_subtype(subtype_)
+        , m_has_subtype(true)
+    {}
+
+    /// @sa https://json.qualla.me/api/byte_container_with_subtype/byte_container_with_subtype/
+    byte_container_with_subtype(container_type&& b, subtype_type subtype_) noexcept(noexcept(container_type(std::move(b))))
+        : container_type(std::move(b))
+        , m_subtype(subtype_)
+        , m_has_subtype(true)
+    {}
+
+    bool operator==(const byte_container_with_subtype& rhs) const
+    {
+        return std::tie(static_cast<const BinaryType&>(*this), m_subtype, m_has_subtype) ==
+               std::tie(static_cast<const BinaryType&>(rhs), rhs.m_subtype, rhs.m_has_subtype);
+    }
+
+    bool operator!=(const byte_container_with_subtype& rhs) const
+    {
+        return !(rhs == *this);
+    }
+
+    /// @brief sets the binary subtype
+    /// @sa https://json.qualla.me/api/byte_container_with_subtype/set_subtype/
+    void set_subtype(subtype_type subtype_) noexcept
+    {
+        m_subtype = subtype_;
+        m_has_subtype = true;
+    }
+
+    /// @brief return the binary subtype
+    /// @sa https://json.qualla.me/api/byte_container_with_subtype/subtype/
+    constexpr subtype_type subtype() const noexcept
+    {
+        return m_has_subtype ? m_subtype : static_cast<subtype_type>(-1);
+    }
+
+    /// @brief return whether the value has a subtype
+    /// @sa https://json.qualla.me/api/byte_container_with_subtype/has_subtype/
+    constexpr bool has_subtype() const noexcept
+    {
+        return m_has_subtype;
+    }
+
+    /// @brief clears the binary subtype
+    /// @sa https://json.qualla.me/api/byte_container_with_subtype/clear_subtype/
+    void clear_subtype() noexcept
+    {
+        m_subtype = 0;
+        m_has_subtype = false;
+    }
+
+  private:
+    subtype_type m_subtype = 0;
+    bool m_has_subtype = false;
+};
+
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/conversions/from_json.hpp>
+
+// #include <qualla/detail/conversions/to_json.hpp>
+
+// #include <qualla/detail/exceptions.hpp>
+
+// #include <qualla/detail/hash.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <cstdint> // uint8_t
+#include <cstddef> // size_t
+#include <functional> // hash
+
+// #include <qualla/detail/abi_macros.hpp>
+
+// #include <qualla/detail/value_t.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+// boost::hash_combine
+inline std::size_t combine(std::size_t seed, std::size_t h) noexcept
+{
+    seed ^= h + 0x9e3779b9 + (seed << 6U) + (seed >> 2U);
+    return seed;
+}
+
+/*!
+@brief hash a JSON value
+
+The hash function tries to rely on std::hash where possible. Furthermore, the
+type of the JSON value is taken into account to have different hash values for
+null, 0, 0U, and false, etc.
+
+@tparam BasicJsonType basic_json specialization
+@param j JSON value to hash
+@return hash value of j
+*/
+template<typename BasicJsonType>
+std::size_t hash(const BasicJsonType& j)
+{
+    using string_t = typename BasicJsonType::string_t;
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+
+    const auto type = static_cast<std::size_t>(j.type());
+    switch (j.type())
+    {
+        case BasicJsonType::value_t::null:
+        case BasicJsonType::value_t::discarded:
+        {
+            return combine(type, 0);
+        }
+
+        case BasicJsonType::value_t::object:
+        {
+            auto seed = combine(type, j.size());
+            for (const auto& element : j.items())
+            {
+                const auto h = std::hash<string_t> {}(element.key());
+                seed = combine(seed, h);
+                seed = combine(seed, hash(element.value()));
+            }
+            return seed;
+        }
+
+        case BasicJsonType::value_t::array:
+        {
+            auto seed = combine(type, j.size());
+            for (const auto& element : j)
+            {
+                seed = combine(seed, hash(element));
+            }
+            return seed;
+        }
+
+        case BasicJsonType::value_t::string:
+        {
+            const auto h = std::hash<string_t> {}(j.template get_ref<const string_t&>());
+            return combine(type, h);
+        }
+
+        case BasicJsonType::value_t::boolean:
+        {
+            const auto h = std::hash<bool> {}(j.template get<bool>());
+            return combine(type, h);
+        }
+
+        case BasicJsonType::value_t::number_integer:
+        {
+            const auto h = std::hash<number_integer_t> {}(j.template get<number_integer_t>());
+            return combine(type, h);
+        }
+
+        case BasicJsonType::value_t::number_unsigned:
+        {
+            const auto h = std::hash<number_unsigned_t> {}(j.template get<number_unsigned_t>());
+            return combine(type, h);
+        }
+
+        case BasicJsonType::value_t::number_float:
+        {
+            const auto h = std::hash<number_float_t> {}(j.template get<number_float_t>());
+            return combine(type, h);
+        }
+
+        case BasicJsonType::value_t::binary:
+        {
+            auto seed = combine(type, j.get_binary().size());
+            const auto h = std::hash<bool> {}(j.get_binary().has_subtype());
+            seed = combine(seed, h);
+            seed = combine(seed, static_cast<std::size_t>(j.get_binary().subtype()));
+            for (const auto byte : j.get_binary())
+            {
+                seed = combine(seed, std::hash<std::uint8_t> {}(byte));
+            }
+            return seed;
+        }
+
+        default:                   // LCOV_EXCL_LINE
+            JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
+            return 0;              // LCOV_EXCL_LINE
+    }
+}
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/input/binary_reader.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <algorithm> // generate_n
+#include <array> // array
+#include <cmath> // ldexp
+#include <cstddef> // size_t
+#include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
+#include <cstdio> // snprintf
+#include <cstring> // memcpy
+#include <iterator> // back_inserter
+#include <limits> // numeric_limits
+#include <string> // char_traits, string
+#include <utility> // make_pair, move
+#include <vector> // vector
+
+// #include <qualla/detail/exceptions.hpp>
+
+// #include <qualla/detail/input/input_adapters.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <array> // array
+#include <cstddef> // size_t
+#include <cstring> // strlen
+#include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
+#include <memory> // shared_ptr, make_shared, addressof
+#include <numeric> // accumulate
+#include <string> // string, char_traits
+#include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
+#include <utility> // pair, declval
+
+#ifndef JSON_NO_IO
+    #include <cstdio>   // FILE *
+    #include <istream>  // istream
+#endif                  // JSON_NO_IO
+
+// #include <qualla/detail/iterators/iterator_traits.hpp>
+
+// #include <qualla/detail/macro_scope.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+/// the supported input formats
+enum class input_format_t { json, cbor, msgpack, ubjson, bson, bjdata };
+
+////////////////////
+// input adapters //
+////////////////////
+
+#ifndef JSON_NO_IO
+/*!
+Input adapter for stdio file access. This adapter read only 1 byte and do not use any
+ buffer. This adapter is a very low level adapter.
+*/
+class file_input_adapter
+{
+  public:
+    using char_type = char;
+
+    JSON_HEDLEY_NON_NULL(2)
+    explicit file_input_adapter(std::FILE* f) noexcept
+        : m_file(f)
+    {
+        JSON_ASSERT(m_file != nullptr);
+    }
+
+    // make class move-only
+    file_input_adapter(const file_input_adapter&) = delete;
+    file_input_adapter(file_input_adapter&&) noexcept = default;
+    file_input_adapter& operator=(const file_input_adapter&) = delete;
+    file_input_adapter& operator=(file_input_adapter&&) = delete;
+    ~file_input_adapter() = default;
+
+    std::char_traits<char>::int_type get_character() noexcept
+    {
+        return std::fgetc(m_file);
+    }
+
+  private:
+    /// the file pointer to read from
+    std::FILE* m_file;
+};
+
+
+/*!
+Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
+beginning of input. Does not support changing the underlying std::streambuf
+in mid-input. Maintains underlying std::istream and std::streambuf to support
+subsequent use of standard std::istream operations to process any input
+characters following those used in parsing the JSON input.  Clears the
+std::istream flags; any input errors (e.g., EOF) will be detected by the first
+subsequent call for input from the std::istream.
+*/
+class input_stream_adapter
+{
+  public:
+    using char_type = char;
+
+    ~input_stream_adapter()
+    {
+        // clear stream flags; we use underlying streambuf I/O, do not
+        // maintain ifstream flags, except eof
+        if (is != nullptr)
+        {
+            is->clear(is->rdstate() & std::ios::eofbit);
+        }
+    }
+
+    explicit input_stream_adapter(std::istream& i)
+        : is(&i), sb(i.rdbuf())
+    {}
+
+    // delete because of pointer members
+    input_stream_adapter(const input_stream_adapter&) = delete;
+    input_stream_adapter& operator=(input_stream_adapter&) = delete;
+    input_stream_adapter& operator=(input_stream_adapter&&) = delete;
+
+    input_stream_adapter(input_stream_adapter&& rhs) noexcept
+        : is(rhs.is), sb(rhs.sb)
+    {
+        rhs.is = nullptr;
+        rhs.sb = nullptr;
+    }
+
+    // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
+    // ensure that std::char_traits<char>::eof() and the character 0xFF do not
+    // end up as the same value, e.g. 0xFFFFFFFF.
+    std::char_traits<char>::int_type get_character()
+    {
+        auto res = sb->sbumpc();
+        // set eof manually, as we don't use the istream interface.
+        if (JSON_HEDLEY_UNLIKELY(res == std::char_traits<char>::eof()))
+        {
+            is->clear(is->rdstate() | std::ios::eofbit);
+        }
+        return res;
+    }
+
+  private:
+    /// the associated input stream
+    std::istream* is = nullptr;
+    std::streambuf* sb = nullptr;
+};
+#endif  // JSON_NO_IO
+
+// General-purpose iterator-based adapter. It might not be as fast as
+// theoretically possible for some containers, but it is extremely versatile.
+template<typename IteratorType>
+class iterator_input_adapter
+{
+  public:
+    using char_type = typename std::iterator_traits<IteratorType>::value_type;
+
+    iterator_input_adapter(IteratorType first, IteratorType last)
+        : current(std::move(first)), end(std::move(last))
+    {}
+
+    typename std::char_traits<char_type>::int_type get_character()
+    {
+        if (JSON_HEDLEY_LIKELY(current != end))
+        {
+            auto result = std::char_traits<char_type>::to_int_type(*current);
+            std::advance(current, 1);
+            return result;
+        }
+
+        return std::char_traits<char_type>::eof();
+    }
+
+  private:
+    IteratorType current;
+    IteratorType end;
+
+    template<typename BaseInputAdapter, size_t T>
+    friend struct wide_string_input_helper;
+
+    bool empty() const
+    {
+        return current == end;
+    }
+};
+
+
+template<typename BaseInputAdapter, size_t T>
+struct wide_string_input_helper;
+
+template<typename BaseInputAdapter>
+struct wide_string_input_helper<BaseInputAdapter, 4>
+{
+    // UTF-32
+    static void fill_buffer(BaseInputAdapter& input,
+                            std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
+                            size_t& utf8_bytes_index,
+                            size_t& utf8_bytes_filled)
+    {
+        utf8_bytes_index = 0;
+
+        if (JSON_HEDLEY_UNLIKELY(input.empty()))
+        {
+            utf8_bytes[0] = std::char_traits<char>::eof();
+            utf8_bytes_filled = 1;
+        }
+        else
+        {
+            // get the current character
+            const auto wc = input.get_character();
+
+            // UTF-32 to UTF-8 encoding
+            if (wc < 0x80)
+            {
+                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
+                utf8_bytes_filled = 1;
+            }
+            else if (wc <= 0x7FF)
+            {
+                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u) & 0x1Fu));
+                utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
+                utf8_bytes_filled = 2;
+            }
+            else if (wc <= 0xFFFF)
+            {
+                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u) & 0x0Fu));
+                utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
+                utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
+                utf8_bytes_filled = 3;
+            }
+            else if (wc <= 0x10FFFF)
+            {
+                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((static_cast<unsigned int>(wc) >> 18u) & 0x07u));
+                utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 12u) & 0x3Fu));
+                utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
+                utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
+                utf8_bytes_filled = 4;
+            }
+            else
+            {
+                // unknown character
+                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
+                utf8_bytes_filled = 1;
+            }
+        }
+    }
+};
+
+template<typename BaseInputAdapter>
+struct wide_string_input_helper<BaseInputAdapter, 2>
+{
+    // UTF-16
+    static void fill_buffer(BaseInputAdapter& input,
+                            std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
+                            size_t& utf8_bytes_index,
+                            size_t& utf8_bytes_filled)
+    {
+        utf8_bytes_index = 0;
+
+        if (JSON_HEDLEY_UNLIKELY(input.empty()))
+        {
+            utf8_bytes[0] = std::char_traits<char>::eof();
+            utf8_bytes_filled = 1;
+        }
+        else
+        {
+            // get the current character
+            const auto wc = input.get_character();
+
+            // UTF-16 to UTF-8 encoding
+            if (wc < 0x80)
+            {
+                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
+                utf8_bytes_filled = 1;
+            }
+            else if (wc <= 0x7FF)
+            {
+                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u)));
+                utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
+                utf8_bytes_filled = 2;
+            }
+            else if (0xD800 > wc || wc >= 0xE000)
+            {
+                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u)));
+                utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
+                utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
+                utf8_bytes_filled = 3;
+            }
+            else
+            {
+                if (JSON_HEDLEY_UNLIKELY(!input.empty()))
+                {
+                    const auto wc2 = static_cast<unsigned int>(input.get_character());
+                    const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
+                    utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
+                    utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
+                    utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
+                    utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
+                    utf8_bytes_filled = 4;
+                }
+                else
+                {
+                    utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
+                    utf8_bytes_filled = 1;
+                }
+            }
+        }
+    }
+};
+
+// Wraps another input apdater to convert wide character types into individual bytes.
+template<typename BaseInputAdapter, typename WideCharType>
+class wide_string_input_adapter
+{
+  public:
+    using char_type = char;
+
+    wide_string_input_adapter(BaseInputAdapter base)
+        : base_adapter(base) {}
+
+    typename std::char_traits<char>::int_type get_character() noexcept
+    {
+        // check if buffer needs to be filled
+        if (utf8_bytes_index == utf8_bytes_filled)
+        {
+            fill_buffer<sizeof(WideCharType)>();
+
+            JSON_ASSERT(utf8_bytes_filled > 0);
+            JSON_ASSERT(utf8_bytes_index == 0);
+        }
+
+        // use buffer
+        JSON_ASSERT(utf8_bytes_filled > 0);
+        JSON_ASSERT(utf8_bytes_index < utf8_bytes_filled);
+        return utf8_bytes[utf8_bytes_index++];
+    }
+
+  private:
+    BaseInputAdapter base_adapter;
+
+    template<size_t T>
+    void fill_buffer()
+    {
+        wide_string_input_helper<BaseInputAdapter, T>::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
+    }
+
+    /// a buffer for UTF-8 bytes
+    std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
+
+    /// index to the utf8_codes array for the next valid byte
+    std::size_t utf8_bytes_index = 0;
+    /// number of valid bytes in the utf8_codes array
+    std::size_t utf8_bytes_filled = 0;
+};
+
+
+template<typename IteratorType, typename Enable = void>
+struct iterator_input_adapter_factory
+{
+    using iterator_type = IteratorType;
+    using char_type = typename std::iterator_traits<iterator_type>::value_type;
+    using adapter_type = iterator_input_adapter<iterator_type>;
+
+    static adapter_type create(IteratorType first, IteratorType last)
+    {
+        return adapter_type(std::move(first), std::move(last));
+    }
+};
+
+template<typename T>
+struct is_iterator_of_multibyte
+{
+    using value_type = typename std::iterator_traits<T>::value_type;
+    enum
+    {
+        value = sizeof(value_type) > 1
+    };
+};
+
+template<typename IteratorType>
+struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>>
+{
+    using iterator_type = IteratorType;
+    using char_type = typename std::iterator_traits<iterator_type>::value_type;
+    using base_adapter_type = iterator_input_adapter<iterator_type>;
+    using adapter_type = wide_string_input_adapter<base_adapter_type, char_type>;
+
+    static adapter_type create(IteratorType first, IteratorType last)
+    {
+        return adapter_type(base_adapter_type(std::move(first), std::move(last)));
+    }
+};
+
+// General purpose iterator-based input
+template<typename IteratorType>
+typename iterator_input_adapter_factory<IteratorType>::adapter_type input_adapter(IteratorType first, IteratorType last)
+{
+    using factory_type = iterator_input_adapter_factory<IteratorType>;
+    return factory_type::create(first, last);
+}
+
+// Convenience shorthand from container to iterator
+// Enables ADL on begin(container) and end(container)
+// Encloses the using declarations in namespace for not to leak them to outside scope
+
+namespace container_input_adapter_factory_impl
+{
+
+using std::begin;
+using std::end;
+
+template<typename ContainerType, typename Enable = void>
+struct container_input_adapter_factory {};
+
+template<typename ContainerType>
+struct container_input_adapter_factory< ContainerType,
+       void_t<decltype(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>()))>>
+       {
+           using adapter_type = decltype(input_adapter(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>())));
+
+           static adapter_type create(const ContainerType& container)
+{
+    return input_adapter(begin(container), end(container));
+}
+       };
+
+}  // namespace container_input_adapter_factory_impl
+
+template<typename ContainerType>
+typename container_input_adapter_factory_impl::container_input_adapter_factory<ContainerType>::adapter_type input_adapter(const ContainerType& container)
+{
+    return container_input_adapter_factory_impl::container_input_adapter_factory<ContainerType>::create(container);
+}
+
+#ifndef JSON_NO_IO
+// Special cases with fast paths
+inline file_input_adapter input_adapter(std::FILE* file)
+{
+    return file_input_adapter(file);
+}
+
+inline input_stream_adapter input_adapter(std::istream& stream)
+{
+    return input_stream_adapter(stream);
+}
+
+inline input_stream_adapter input_adapter(std::istream&& stream)
+{
+    return input_stream_adapter(stream);
+}
+#endif  // JSON_NO_IO
+
+using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval<const char*>(), std::declval<const char*>()));
+
+// Null-delimited strings, and the like.
+template < typename CharT,
+           typename std::enable_if <
+               std::is_pointer<CharT>::value&&
+               !std::is_array<CharT>::value&&
+               std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
+               sizeof(typename std::remove_pointer<CharT>::type) == 1,
+               int >::type = 0 >
+contiguous_bytes_input_adapter input_adapter(CharT b)
+{
+    auto length = std::strlen(reinterpret_cast<const char*>(b));
+    const auto* ptr = reinterpret_cast<const char*>(b);
+    return input_adapter(ptr, ptr + length);
+}
+
+template<typename T, std::size_t N>
+auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array + N)) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
+{
+    return input_adapter(array, array + N);
+}
+
+// This class only handles inputs of input_buffer_adapter type.
+// It's required so that expressions like {ptr, len} can be implicitly cast
+// to the correct adapter.
+class span_input_adapter
+{
+  public:
+    template < typename CharT,
+               typename std::enable_if <
+                   std::is_pointer<CharT>::value&&
+                   std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
+                   sizeof(typename std::remove_pointer<CharT>::type) == 1,
+                   int >::type = 0 >
+    span_input_adapter(CharT b, std::size_t l)
+        : ia(reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(b) + l) {}
+
+    template<class IteratorType,
+             typename std::enable_if<
+                 std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
+                 int>::type = 0>
+    span_input_adapter(IteratorType first, IteratorType last)
+        : ia(input_adapter(first, last)) {}
+
+    contiguous_bytes_input_adapter&& get()
+    {
+        return std::move(ia); // NOLINT(hicpp-move-const-arg,performance-move-const-arg)
+    }
+
+  private:
+    contiguous_bytes_input_adapter ia;
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/input/json_sax.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <cstddef>
+#include <string> // string
+#include <utility> // move
+#include <vector> // vector
+
+// #include <qualla/detail/exceptions.hpp>
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/string_concat.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+
+/*!
+@brief SAX interface
+
+This class describes the SAX interface used by @ref qualla::json::sax_parse.
+Each function is called in different situations while the input is parsed. The
+boolean return value informs the parser whether to continue processing the
+input.
+*/
+template<typename BasicJsonType>
+struct json_sax
+{
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+    using string_t = typename BasicJsonType::string_t;
+    using binary_t = typename BasicJsonType::binary_t;
+
+    /*!
+    @brief a null value was read
+    @return whether parsing should proceed
+    */
+    virtual bool null() = 0;
+
+    /*!
+    @brief a boolean value was read
+    @param[in] val  boolean value
+    @return whether parsing should proceed
+    */
+    virtual bool boolean(bool val) = 0;
+
+    /*!
+    @brief an integer number was read
+    @param[in] val  integer value
+    @return whether parsing should proceed
+    */
+    virtual bool number_integer(number_integer_t val) = 0;
+
+    /*!
+    @brief an unsigned integer number was read
+    @param[in] val  unsigned integer value
+    @return whether parsing should proceed
+    */
+    virtual bool number_unsigned(number_unsigned_t val) = 0;
+
+    /*!
+    @brief a floating-point number was read
+    @param[in] val  floating-point value
+    @param[in] s    raw token value
+    @return whether parsing should proceed
+    */
+    virtual bool number_float(number_float_t val, const string_t& s) = 0;
+
+    /*!
+    @brief a string value was read
+    @param[in] val  string value
+    @return whether parsing should proceed
+    @note It is safe to move the passed string value.
+    */
+    virtual bool string(string_t& val) = 0;
+
+    /*!
+    @brief a binary value was read
+    @param[in] val  binary value
+    @return whether parsing should proceed
+    @note It is safe to move the passed binary value.
+    */
+    virtual bool binary(binary_t& val) = 0;
+
+    /*!
+    @brief the beginning of an object was read
+    @param[in] elements  number of object elements or -1 if unknown
+    @return whether parsing should proceed
+    @note binary formats may report the number of elements
+    */
+    virtual bool start_object(std::size_t elements) = 0;
+
+    /*!
+    @brief an object key was read
+    @param[in] val  object key
+    @return whether parsing should proceed
+    @note It is safe to move the passed string.
+    */
+    virtual bool key(string_t& val) = 0;
+
+    /*!
+    @brief the end of an object was read
+    @return whether parsing should proceed
+    */
+    virtual bool end_object() = 0;
+
+    /*!
+    @brief the beginning of an array was read
+    @param[in] elements  number of array elements or -1 if unknown
+    @return whether parsing should proceed
+    @note binary formats may report the number of elements
+    */
+    virtual bool start_array(std::size_t elements) = 0;
+
+    /*!
+    @brief the end of an array was read
+    @return whether parsing should proceed
+    */
+    virtual bool end_array() = 0;
+
+    /*!
+    @brief a parse error occurred
+    @param[in] position    the position in the input where the error occurs
+    @param[in] last_token  the last read token
+    @param[in] ex          an exception object describing the error
+    @return whether parsing should proceed (must return false)
+    */
+    virtual bool parse_error(std::size_t position,
+                             const std::string& last_token,
+                             const detail::exception& ex) = 0;
+
+    json_sax() = default;
+    json_sax(const json_sax&) = default;
+    json_sax(json_sax&&) noexcept = default;
+    json_sax& operator=(const json_sax&) = default;
+    json_sax& operator=(json_sax&&) noexcept = default;
+    virtual ~json_sax() = default;
+};
+
+
+namespace detail
+{
+/*!
+@brief SAX implementation to create a JSON value from SAX events
+
+This class implements the @ref json_sax interface and processes the SAX events
+to create a JSON value which makes it basically a DOM parser. The structure or
+hierarchy of the JSON value is managed by the stack `ref_stack` which contains
+a pointer to the respective array or object for each recursion depth.
+
+After successful parsing, the value that is passed by reference to the
+constructor contains the parsed value.
+
+@tparam BasicJsonType  the JSON type
+*/
+template<typename BasicJsonType>
+class json_sax_dom_parser
+{
+  public:
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+    using string_t = typename BasicJsonType::string_t;
+    using binary_t = typename BasicJsonType::binary_t;
+
+    /*!
+    @param[in,out] r  reference to a JSON value that is manipulated while
+                       parsing
+    @param[in] allow_exceptions_  whether parse errors yield exceptions
+    */
+    explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true)
+        : root(r), allow_exceptions(allow_exceptions_)
+    {}
+
+    // make class move-only
+    json_sax_dom_parser(const json_sax_dom_parser&) = delete;
+    json_sax_dom_parser(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
+    json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete;
+    json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
+    ~json_sax_dom_parser() = default;
+
+    bool null()
+    {
+        handle_value(nullptr);
+        return true;
+    }
+
+    bool boolean(bool val)
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool number_integer(number_integer_t val)
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool number_unsigned(number_unsigned_t val)
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool number_float(number_float_t val, const string_t& /*unused*/)
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool string(string_t& val)
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool binary(binary_t& val)
+    {
+        handle_value(std::move(val));
+        return true;
+    }
+
+    bool start_object(std::size_t len)
+    {
+        ref_stack.push_back(handle_value(BasicJsonType::value_t::object));
+
+        if (JSON_HEDLEY_UNLIKELY(len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size()))
+        {
+            JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back()));
+        }
+
+        return true;
+    }
+
+    bool key(string_t& val)
+    {
+        JSON_ASSERT(!ref_stack.empty());
+        JSON_ASSERT(ref_stack.back()->is_object());
+
+        // add null at given key and store the reference for later
+        object_element = &(ref_stack.back()->m_value.object->operator[](val));
+        return true;
+    }
+
+    bool end_object()
+    {
+        JSON_ASSERT(!ref_stack.empty());
+        JSON_ASSERT(ref_stack.back()->is_object());
+
+        ref_stack.back()->set_parents();
+        ref_stack.pop_back();
+        return true;
+    }
+
+    bool start_array(std::size_t len)
+    {
+        ref_stack.push_back(handle_value(BasicJsonType::value_t::array));
+
+        if (JSON_HEDLEY_UNLIKELY(len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size()))
+        {
+            JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back()));
+        }
+
+        return true;
+    }
+
+    bool end_array()
+    {
+        JSON_ASSERT(!ref_stack.empty());
+        JSON_ASSERT(ref_stack.back()->is_array());
+
+        ref_stack.back()->set_parents();
+        ref_stack.pop_back();
+        return true;
+    }
+
+    template<class Exception>
+    bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/,
+                     const Exception& ex)
+    {
+        errored = true;
+        static_cast<void>(ex);
+        if (allow_exceptions)
+        {
+            JSON_THROW(ex);
+        }
+        return false;
+    }
+
+    constexpr bool is_errored() const
+    {
+        return errored;
+    }
+
+  private:
+    /*!
+    @invariant If the ref stack is empty, then the passed value will be the new
+               root.
+    @invariant If the ref stack contains a value, then it is an array or an
+               object to which we can add elements
+    */
+    template<typename Value>
+    JSON_HEDLEY_RETURNS_NON_NULL
+    BasicJsonType* handle_value(Value&& v)
+    {
+        if (ref_stack.empty())
+        {
+            root = BasicJsonType(std::forward<Value>(v));
+            return &root;
+        }
+
+        JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object());
+
+        if (ref_stack.back()->is_array())
+        {
+            ref_stack.back()->m_value.array->emplace_back(std::forward<Value>(v));
+            return &(ref_stack.back()->m_value.array->back());
+        }
+
+        JSON_ASSERT(ref_stack.back()->is_object());
+        JSON_ASSERT(object_element);
+        *object_element = BasicJsonType(std::forward<Value>(v));
+        return object_element;
+    }
+
+    /// the parsed JSON value
+    BasicJsonType& root;
+    /// stack to model hierarchy of values
+    std::vector<BasicJsonType*> ref_stack {};
+    /// helper to hold the reference for the next object element
+    BasicJsonType* object_element = nullptr;
+    /// whether a syntax error occurred
+    bool errored = false;
+    /// whether to throw exceptions in case of errors
+    const bool allow_exceptions = true;
+};
+
+template<typename BasicJsonType>
+class json_sax_dom_callback_parser
+{
+  public:
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+    using string_t = typename BasicJsonType::string_t;
+    using binary_t = typename BasicJsonType::binary_t;
+    using parser_callback_t = typename BasicJsonType::parser_callback_t;
+    using parse_event_t = typename BasicJsonType::parse_event_t;
+
+    json_sax_dom_callback_parser(BasicJsonType& r,
+                                 const parser_callback_t cb,
+                                 const bool allow_exceptions_ = true)
+        : root(r), callback(cb), allow_exceptions(allow_exceptions_)
+    {
+        keep_stack.push_back(true);
+    }
+
+    // make class move-only
+    json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete;
+    json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
+    json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete;
+    json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
+    ~json_sax_dom_callback_parser() = default;
+
+    bool null()
+    {
+        handle_value(nullptr);
+        return true;
+    }
+
+    bool boolean(bool val)
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool number_integer(number_integer_t val)
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool number_unsigned(number_unsigned_t val)
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool number_float(number_float_t val, const string_t& /*unused*/)
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool string(string_t& val)
+    {
+        handle_value(val);
+        return true;
+    }
+
+    bool binary(binary_t& val)
+    {
+        handle_value(std::move(val));
+        return true;
+    }
+
+    bool start_object(std::size_t len)
+    {
+        // check callback for object start
+        const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::object_start, discarded);
+        keep_stack.push_back(keep);
+
+        auto val = handle_value(BasicJsonType::value_t::object, true);
+        ref_stack.push_back(val.second);
+
+        // check object limit
+        if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size()))
+        {
+            JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back()));
+        }
+
+        return true;
+    }
+
+    bool key(string_t& val)
+    {
+        BasicJsonType k = BasicJsonType(val);
+
+        // check callback for key
+        const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::key, k);
+        key_keep_stack.push_back(keep);
+
+        // add discarded value at given key and store the reference for later
+        if (keep && ref_stack.back())
+        {
+            object_element = &(ref_stack.back()->m_value.object->operator[](val) = discarded);
+        }
+
+        return true;
+    }
+
+    bool end_object()
+    {
+        if (ref_stack.back())
+        {
+            if (!callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back()))
+            {
+                // discard object
+                *ref_stack.back() = discarded;
+            }
+            else
+            {
+                ref_stack.back()->set_parents();
+            }
+        }
+
+        JSON_ASSERT(!ref_stack.empty());
+        JSON_ASSERT(!keep_stack.empty());
+        ref_stack.pop_back();
+        keep_stack.pop_back();
+
+        if (!ref_stack.empty() && ref_stack.back() && ref_stack.back()->is_structured())
+        {
+            // remove discarded value
+            for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it)
+            {
+                if (it->is_discarded())
+                {
+                    ref_stack.back()->erase(it);
+                    break;
+                }
+            }
+        }
+
+        return true;
+    }
+
+    bool start_array(std::size_t len)
+    {
+        const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::array_start, discarded);
+        keep_stack.push_back(keep);
+
+        auto val = handle_value(BasicJsonType::value_t::array, true);
+        ref_stack.push_back(val.second);
+
+        // check array limit
+        if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size()))
+        {
+            JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back()));
+        }
+
+        return true;
+    }
+
+    bool end_array()
+    {
+        bool keep = true;
+
+        if (ref_stack.back())
+        {
+            keep = callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back());
+            if (keep)
+            {
+                ref_stack.back()->set_parents();
+            }
+            else
+            {
+                // discard array
+                *ref_stack.back() = discarded;
+            }
+        }
+
+        JSON_ASSERT(!ref_stack.empty());
+        JSON_ASSERT(!keep_stack.empty());
+        ref_stack.pop_back();
+        keep_stack.pop_back();
+
+        // remove discarded value
+        if (!keep && !ref_stack.empty() && ref_stack.back()->is_array())
+        {
+            ref_stack.back()->m_value.array->pop_back();
+        }
+
+        return true;
+    }
+
+    template<class Exception>
+    bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/,
+                     const Exception& ex)
+    {
+        errored = true;
+        static_cast<void>(ex);
+        if (allow_exceptions)
+        {
+            JSON_THROW(ex);
+        }
+        return false;
+    }
+
+    constexpr bool is_errored() const
+    {
+        return errored;
+    }
+
+  private:
+    /*!
+    @param[in] v  value to add to the JSON value we build during parsing
+    @param[in] skip_callback  whether we should skip calling the callback
+               function; this is required after start_array() and
+               start_object() SAX events, because otherwise we would call the
+               callback function with an empty array or object, respectively.
+
+    @invariant If the ref stack is empty, then the passed value will be the new
+               root.
+    @invariant If the ref stack contains a value, then it is an array or an
+               object to which we can add elements
+
+    @return pair of boolean (whether value should be kept) and pointer (to the
+            passed value in the ref_stack hierarchy; nullptr if not kept)
+    */
+    template<typename Value>
+    std::pair<bool, BasicJsonType*> handle_value(Value&& v, const bool skip_callback = false)
+    {
+        JSON_ASSERT(!keep_stack.empty());
+
+        // do not handle this value if we know it would be added to a discarded
+        // container
+        if (!keep_stack.back())
+        {
+            return {false, nullptr};
+        }
+
+        // create value
+        auto value = BasicJsonType(std::forward<Value>(v));
+
+        // check callback
+        const bool keep = skip_callback || callback(static_cast<int>(ref_stack.size()), parse_event_t::value, value);
+
+        // do not handle this value if we just learnt it shall be discarded
+        if (!keep)
+        {
+            return {false, nullptr};
+        }
+
+        if (ref_stack.empty())
+        {
+            root = std::move(value);
+            return {true, &root};
+        }
+
+        // skip this value if we already decided to skip the parent
+        // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360)
+        if (!ref_stack.back())
+        {
+            return {false, nullptr};
+        }
+
+        // we now only expect arrays and objects
+        JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object());
+
+        // array
+        if (ref_stack.back()->is_array())
+        {
+            ref_stack.back()->m_value.array->emplace_back(std::move(value));
+            return {true, &(ref_stack.back()->m_value.array->back())};
+        }
+
+        // object
+        JSON_ASSERT(ref_stack.back()->is_object());
+        // check if we should store an element for the current key
+        JSON_ASSERT(!key_keep_stack.empty());
+        const bool store_element = key_keep_stack.back();
+        key_keep_stack.pop_back();
+
+        if (!store_element)
+        {
+            return {false, nullptr};
+        }
+
+        JSON_ASSERT(object_element);
+        *object_element = std::move(value);
+        return {true, object_element};
+    }
+
+    /// the parsed JSON value
+    BasicJsonType& root;
+    /// stack to model hierarchy of values
+    std::vector<BasicJsonType*> ref_stack {};
+    /// stack to manage which values to keep
+    std::vector<bool> keep_stack {};
+    /// stack to manage which object keys to keep
+    std::vector<bool> key_keep_stack {};
+    /// helper to hold the reference for the next object element
+    BasicJsonType* object_element = nullptr;
+    /// whether a syntax error occurred
+    bool errored = false;
+    /// callback function
+    const parser_callback_t callback = nullptr;
+    /// whether to throw exceptions in case of errors
+    const bool allow_exceptions = true;
+    /// a discarded value for the callback
+    BasicJsonType discarded = BasicJsonType::value_t::discarded;
+};
+
+template<typename BasicJsonType>
+class json_sax_acceptor
+{
+  public:
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+    using string_t = typename BasicJsonType::string_t;
+    using binary_t = typename BasicJsonType::binary_t;
+
+    bool null()
+    {
+        return true;
+    }
+
+    bool boolean(bool /*unused*/)
+    {
+        return true;
+    }
+
+    bool number_integer(number_integer_t /*unused*/)
+    {
+        return true;
+    }
+
+    bool number_unsigned(number_unsigned_t /*unused*/)
+    {
+        return true;
+    }
+
+    bool number_float(number_float_t /*unused*/, const string_t& /*unused*/)
+    {
+        return true;
+    }
+
+    bool string(string_t& /*unused*/)
+    {
+        return true;
+    }
+
+    bool binary(binary_t& /*unused*/)
+    {
+        return true;
+    }
+
+    bool start_object(std::size_t /*unused*/ = static_cast<std::size_t>(-1))
+    {
+        return true;
+    }
+
+    bool key(string_t& /*unused*/)
+    {
+        return true;
+    }
+
+    bool end_object()
+    {
+        return true;
+    }
+
+    bool start_array(std::size_t /*unused*/ = static_cast<std::size_t>(-1))
+    {
+        return true;
+    }
+
+    bool end_array()
+    {
+        return true;
+    }
+
+    bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/)
+    {
+        return false;
+    }
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/input/lexer.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <array> // array
+#include <clocale> // localeconv
+#include <cstddef> // size_t
+#include <cstdio> // snprintf
+#include <cstdlib> // strtof, strtod, strtold, strtoll, strtoull
+#include <initializer_list> // initializer_list
+#include <string> // char_traits, string
+#include <utility> // move
+#include <vector> // vector
+
+// #include <qualla/detail/input/input_adapters.hpp>
+
+// #include <qualla/detail/input/position_t.hpp>
+
+// #include <qualla/detail/macro_scope.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+///////////
+// lexer //
+///////////
+
+template<typename BasicJsonType>
+class lexer_base
+{
+  public:
+    /// token types for the parser
+    enum class token_type
+    {
+        uninitialized,    ///< indicating the scanner is uninitialized
+        literal_true,     ///< the `true` literal
+        literal_false,    ///< the `false` literal
+        literal_null,     ///< the `null` literal
+        value_string,     ///< a string -- use get_string() for actual value
+        value_unsigned,   ///< an unsigned integer -- use get_number_unsigned() for actual value
+        value_integer,    ///< a signed integer -- use get_number_integer() for actual value
+        value_float,      ///< an floating point number -- use get_number_float() for actual value
+        begin_array,      ///< the character for array begin `[`
+        begin_object,     ///< the character for object begin `{`
+        end_array,        ///< the character for array end `]`
+        end_object,       ///< the character for object end `}`
+        name_separator,   ///< the name separator `:`
+        value_separator,  ///< the value separator `,`
+        parse_error,      ///< indicating a parse error
+        end_of_input,     ///< indicating the end of the input buffer
+        literal_or_value  ///< a literal or the begin of a value (only for diagnostics)
+    };
+
+    /// return name of values of type token_type (only used for errors)
+    JSON_HEDLEY_RETURNS_NON_NULL
+    JSON_HEDLEY_CONST
+    static const char* token_type_name(const token_type t) noexcept
+    {
+        switch (t)
+        {
+            case token_type::uninitialized:
+                return "<uninitialized>";
+            case token_type::literal_true:
+                return "true literal";
+            case token_type::literal_false:
+                return "false literal";
+            case token_type::literal_null:
+                return "null literal";
+            case token_type::value_string:
+                return "string literal";
+            case token_type::value_unsigned:
+            case token_type::value_integer:
+            case token_type::value_float:
+                return "number literal";
+            case token_type::begin_array:
+                return "'['";
+            case token_type::begin_object:
+                return "'{'";
+            case token_type::end_array:
+                return "']'";
+            case token_type::end_object:
+                return "'}'";
+            case token_type::name_separator:
+                return "':'";
+            case token_type::value_separator:
+                return "','";
+            case token_type::parse_error:
+                return "<parse error>";
+            case token_type::end_of_input:
+                return "end of input";
+            case token_type::literal_or_value:
+                return "'[', '{', or a literal";
+            // LCOV_EXCL_START
+            default: // catch non-enum values
+                return "unknown token";
+                // LCOV_EXCL_STOP
+        }
+    }
+};
+/*!
+@brief lexical analysis
+
+This class organizes the lexical analysis during JSON deserialization.
+*/
+template<typename BasicJsonType, typename InputAdapterType>
+class lexer : public lexer_base<BasicJsonType>
+{
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+    using string_t = typename BasicJsonType::string_t;
+    using char_type = typename InputAdapterType::char_type;
+    using char_int_type = typename std::char_traits<char_type>::int_type;
+
+  public:
+    using token_type = typename lexer_base<BasicJsonType>::token_type;
+
+    explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept
+        : ia(std::move(adapter))
+        , ignore_comments(ignore_comments_)
+        , decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
+    {}
+
+    // delete because of pointer members
+    lexer(const lexer&) = delete;
+    lexer(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
+    lexer& operator=(lexer&) = delete;
+    lexer& operator=(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
+    ~lexer() = default;
+
+  private:
+    /////////////////////
+    // locales
+    /////////////////////
+
+    /// return the locale-dependent decimal point
+    JSON_HEDLEY_PURE
+    static char get_decimal_point() noexcept
+    {
+        const auto* loc = localeconv();
+        JSON_ASSERT(loc != nullptr);
+        return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point);
+    }
+
+    /////////////////////
+    // scan functions
+    /////////////////////
+
+    /*!
+    @brief get codepoint from 4 hex characters following `\u`
+
+    For input "\u c1 c2 c3 c4" the codepoint is:
+      (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4
+    = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0)
+
+    Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f'
+    must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The
+    conversion is done by subtracting the offset (0x30, 0x37, and 0x57)
+    between the ASCII value of the character and the desired integer value.
+
+    @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or
+            non-hex character)
+    */
+    int get_codepoint()
+    {
+        // this function only makes sense after reading `\u`
+        JSON_ASSERT(current == 'u');
+        int codepoint = 0;
+
+        const auto factors = { 12u, 8u, 4u, 0u };
+        for (const auto factor : factors)
+        {
+            get();
+
+            if (current >= '0' && current <= '9')
+            {
+                codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x30u) << factor);
+            }
+            else if (current >= 'A' && current <= 'F')
+            {
+                codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x37u) << factor);
+            }
+            else if (current >= 'a' && current <= 'f')
+            {
+                codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x57u) << factor);
+            }
+            else
+            {
+                return -1;
+            }
+        }
+
+        JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF);
+        return codepoint;
+    }
+
+    /*!
+    @brief check if the next byte(s) are inside a given range
+
+    Adds the current byte and, for each passed range, reads a new byte and
+    checks if it is inside the range. If a violation was detected, set up an
+    error message and return false. Otherwise, return true.
+
+    @param[in] ranges  list of integers; interpreted as list of pairs of
+                       inclusive lower and upper bound, respectively
+
+    @pre The passed list @a ranges must have 2, 4, or 6 elements; that is,
+         1, 2, or 3 pairs. This precondition is enforced by an assertion.
+
+    @return true if and only if no range violation was detected
+    */
+    bool next_byte_in_range(std::initializer_list<char_int_type> ranges)
+    {
+        JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6);
+        add(current);
+
+        for (auto range = ranges.begin(); range != ranges.end(); ++range)
+        {
+            get();
+            if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range)))
+            {
+                add(current);
+            }
+            else
+            {
+                error_message = "invalid string: ill-formed UTF-8 byte";
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /*!
+    @brief scan a string literal
+
+    This function scans a string according to Sect. 7 of RFC 8259. While
+    scanning, bytes are escaped and copied into buffer token_buffer. Then the
+    function returns successfully, token_buffer is *not* null-terminated (as it
+    may contain \0 bytes), and token_buffer.size() is the number of bytes in the
+    string.
+
+    @return token_type::value_string if string could be successfully scanned,
+            token_type::parse_error otherwise
+
+    @note In case of errors, variable error_message contains a textual
+          description.
+    */
+    token_type scan_string()
+    {
+        // reset token_buffer (ignore opening quote)
+        reset();
+
+        // we entered the function by reading an open quote
+        JSON_ASSERT(current == '\"');
+
+        while (true)
+        {
+            // get next character
+            switch (get())
+            {
+                // end of file while parsing string
+                case std::char_traits<char_type>::eof():
+                {
+                    error_message = "invalid string: missing closing quote";
+                    return token_type::parse_error;
+                }
+
+                // closing quote
+                case '\"':
+                {
+                    return token_type::value_string;
+                }
+
+                // escapes
+                case '\\':
+                {
+                    switch (get())
+                    {
+                        // quotation mark
+                        case '\"':
+                            add('\"');
+                            break;
+                        // reverse solidus
+                        case '\\':
+                            add('\\');
+                            break;
+                        // solidus
+                        case '/':
+                            add('/');
+                            break;
+                        // backspace
+                        case 'b':
+                            add('\b');
+                            break;
+                        // form feed
+                        case 'f':
+                            add('\f');
+                            break;
+                        // line feed
+                        case 'n':
+                            add('\n');
+                            break;
+                        // carriage return
+                        case 'r':
+                            add('\r');
+                            break;
+                        // tab
+                        case 't':
+                            add('\t');
+                            break;
+
+                        // unicode escapes
+                        case 'u':
+                        {
+                            const int codepoint1 = get_codepoint();
+                            int codepoint = codepoint1; // start with codepoint1
+
+                            if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1))
+                            {
+                                error_message = "invalid string: '\\u' must be followed by 4 hex digits";
+                                return token_type::parse_error;
+                            }
+
+                            // check if code point is a high surrogate
+                            if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF)
+                            {
+                                // expect next \uxxxx entry
+                                if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u'))
+                                {
+                                    const int codepoint2 = get_codepoint();
+
+                                    if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1))
+                                    {
+                                        error_message = "invalid string: '\\u' must be followed by 4 hex digits";
+                                        return token_type::parse_error;
+                                    }
+
+                                    // check if codepoint2 is a low surrogate
+                                    if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF))
+                                    {
+                                        // overwrite codepoint
+                                        codepoint = static_cast<int>(
+                                                        // high surrogate occupies the most significant 22 bits
+                                                        (static_cast<unsigned int>(codepoint1) << 10u)
+                                                        // low surrogate occupies the least significant 15 bits
+                                                        + static_cast<unsigned int>(codepoint2)
+                                                        // there is still the 0xD800, 0xDC00 and 0x10000 noise
+                                                        // in the result, so we have to subtract with:
+                                                        // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
+                                                        - 0x35FDC00u);
+                                    }
+                                    else
+                                    {
+                                        error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
+                                        return token_type::parse_error;
+                                    }
+                                }
+                                else
+                                {
+                                    error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
+                                    return token_type::parse_error;
+                                }
+                            }
+                            else
+                            {
+                                if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF))
+                                {
+                                    error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
+                                    return token_type::parse_error;
+                                }
+                            }
+
+                            // result of the above calculation yields a proper codepoint
+                            JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF);
+
+                            // translate codepoint into bytes
+                            if (codepoint < 0x80)
+                            {
+                                // 1-byte characters: 0xxxxxxx (ASCII)
+                                add(static_cast<char_int_type>(codepoint));
+                            }
+                            else if (codepoint <= 0x7FF)
+                            {
+                                // 2-byte characters: 110xxxxx 10xxxxxx
+                                add(static_cast<char_int_type>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u)));
+                                add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
+                            }
+                            else if (codepoint <= 0xFFFF)
+                            {
+                                // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
+                                add(static_cast<char_int_type>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u)));
+                                add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
+                                add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
+                            }
+                            else
+                            {
+                                // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+                                add(static_cast<char_int_type>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u)));
+                                add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
+                                add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
+                                add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
+                            }
+
+                            break;
+                        }
+
+                        // other characters after escape
+                        default:
+                            error_message = "invalid string: forbidden character after backslash";
+                            return token_type::parse_error;
+                    }
+
+                    break;
+                }
+
+                // invalid control characters
+                case 0x00:
+                {
+                    error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
+                    return token_type::parse_error;
+                }
+
+                case 0x01:
+                {
+                    error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
+                    return token_type::parse_error;
+                }
+
+                case 0x02:
+                {
+                    error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002";
+                    return token_type::parse_error;
+                }
+
+                case 0x03:
+                {
+                    error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
+                    return token_type::parse_error;
+                }
+
+                case 0x04:
+                {
+                    error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
+                    return token_type::parse_error;
+                }
+
+                case 0x05:
+                {
+                    error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
+                    return token_type::parse_error;
+                }
+
+                case 0x06:
+                {
+                    error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
+                    return token_type::parse_error;
+                }
+
+                case 0x07:
+                {
+                    error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
+                    return token_type::parse_error;
+                }
+
+                case 0x08:
+                {
+                    error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
+                    return token_type::parse_error;
+                }
+
+                case 0x09:
+                {
+                    error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
+                    return token_type::parse_error;
+                }
+
+                case 0x0A:
+                {
+                    error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
+                    return token_type::parse_error;
+                }
+
+                case 0x0B:
+                {
+                    error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B";
+                    return token_type::parse_error;
+                }
+
+                case 0x0C:
+                {
+                    error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
+                    return token_type::parse_error;
+                }
+
+                case 0x0D:
+                {
+                    error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
+                    return token_type::parse_error;
+                }
+
+                case 0x0E:
+                {
+                    error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E";
+                    return token_type::parse_error;
+                }
+
+                case 0x0F:
+                {
+                    error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F";
+                    return token_type::parse_error;
+                }
+
+                case 0x10:
+                {
+                    error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
+                    return token_type::parse_error;
+                }
+
+                case 0x11:
+                {
+                    error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
+                    return token_type::parse_error;
+                }
+
+                case 0x12:
+                {
+                    error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
+                    return token_type::parse_error;
+                }
+
+                case 0x13:
+                {
+                    error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
+                    return token_type::parse_error;
+                }
+
+                case 0x14:
+                {
+                    error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
+                    return token_type::parse_error;
+                }
+
+                case 0x15:
+                {
+                    error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
+                    return token_type::parse_error;
+                }
+
+                case 0x16:
+                {
+                    error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
+                    return token_type::parse_error;
+                }
+
+                case 0x17:
+                {
+                    error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
+                    return token_type::parse_error;
+                }
+
+                case 0x18:
+                {
+                    error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
+                    return token_type::parse_error;
+                }
+
+                case 0x19:
+                {
+                    error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019";
+                    return token_type::parse_error;
+                }
+
+                case 0x1A:
+                {
+                    error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A";
+                    return token_type::parse_error;
+                }
+
+                case 0x1B:
+                {
+                    error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B";
+                    return token_type::parse_error;
+                }
+
+                case 0x1C:
+                {
+                    error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C";
+                    return token_type::parse_error;
+                }
+
+                case 0x1D:
+                {
+                    error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D";
+                    return token_type::parse_error;
+                }
+
+                case 0x1E:
+                {
+                    error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E";
+                    return token_type::parse_error;
+                }
+
+                case 0x1F:
+                {
+                    error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F";
+                    return token_type::parse_error;
+                }
+
+                // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace))
+                case 0x20:
+                case 0x21:
+                case 0x23:
+                case 0x24:
+                case 0x25:
+                case 0x26:
+                case 0x27:
+                case 0x28:
+                case 0x29:
+                case 0x2A:
+                case 0x2B:
+                case 0x2C:
+                case 0x2D:
+                case 0x2E:
+                case 0x2F:
+                case 0x30:
+                case 0x31:
+                case 0x32:
+                case 0x33:
+                case 0x34:
+                case 0x35:
+                case 0x36:
+                case 0x37:
+                case 0x38:
+                case 0x39:
+                case 0x3A:
+                case 0x3B:
+                case 0x3C:
+                case 0x3D:
+                case 0x3E:
+                case 0x3F:
+                case 0x40:
+                case 0x41:
+                case 0x42:
+                case 0x43:
+                case 0x44:
+                case 0x45:
+                case 0x46:
+                case 0x47:
+                case 0x48:
+                case 0x49:
+                case 0x4A:
+                case 0x4B:
+                case 0x4C:
+                case 0x4D:
+                case 0x4E:
+                case 0x4F:
+                case 0x50:
+                case 0x51:
+                case 0x52:
+                case 0x53:
+                case 0x54:
+                case 0x55:
+                case 0x56:
+                case 0x57:
+                case 0x58:
+                case 0x59:
+                case 0x5A:
+                case 0x5B:
+                case 0x5D:
+                case 0x5E:
+                case 0x5F:
+                case 0x60:
+                case 0x61:
+                case 0x62:
+                case 0x63:
+                case 0x64:
+                case 0x65:
+                case 0x66:
+                case 0x67:
+                case 0x68:
+                case 0x69:
+                case 0x6A:
+                case 0x6B:
+                case 0x6C:
+                case 0x6D:
+                case 0x6E:
+                case 0x6F:
+                case 0x70:
+                case 0x71:
+                case 0x72:
+                case 0x73:
+                case 0x74:
+                case 0x75:
+                case 0x76:
+                case 0x77:
+                case 0x78:
+                case 0x79:
+                case 0x7A:
+                case 0x7B:
+                case 0x7C:
+                case 0x7D:
+                case 0x7E:
+                case 0x7F:
+                {
+                    add(current);
+                    break;
+                }
+
+                // U+0080..U+07FF: bytes C2..DF 80..BF
+                case 0xC2:
+                case 0xC3:
+                case 0xC4:
+                case 0xC5:
+                case 0xC6:
+                case 0xC7:
+                case 0xC8:
+                case 0xC9:
+                case 0xCA:
+                case 0xCB:
+                case 0xCC:
+                case 0xCD:
+                case 0xCE:
+                case 0xCF:
+                case 0xD0:
+                case 0xD1:
+                case 0xD2:
+                case 0xD3:
+                case 0xD4:
+                case 0xD5:
+                case 0xD6:
+                case 0xD7:
+                case 0xD8:
+                case 0xD9:
+                case 0xDA:
+                case 0xDB:
+                case 0xDC:
+                case 0xDD:
+                case 0xDE:
+                case 0xDF:
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF})))
+                    {
+                        return token_type::parse_error;
+                    }
+                    break;
+                }
+
+                // U+0800..U+0FFF: bytes E0 A0..BF 80..BF
+                case 0xE0:
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
+                    {
+                        return token_type::parse_error;
+                    }
+                    break;
+                }
+
+                // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF
+                // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF
+                case 0xE1:
+                case 0xE2:
+                case 0xE3:
+                case 0xE4:
+                case 0xE5:
+                case 0xE6:
+                case 0xE7:
+                case 0xE8:
+                case 0xE9:
+                case 0xEA:
+                case 0xEB:
+                case 0xEC:
+                case 0xEE:
+                case 0xEF:
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
+                    {
+                        return token_type::parse_error;
+                    }
+                    break;
+                }
+
+                // U+D000..U+D7FF: bytes ED 80..9F 80..BF
+                case 0xED:
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
+                    {
+                        return token_type::parse_error;
+                    }
+                    break;
+                }
+
+                // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
+                case 0xF0:
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
+                    {
+                        return token_type::parse_error;
+                    }
+                    break;
+                }
+
+                // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
+                case 0xF1:
+                case 0xF2:
+                case 0xF3:
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
+                    {
+                        return token_type::parse_error;
+                    }
+                    break;
+                }
+
+                // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
+                case 0xF4:
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
+                    {
+                        return token_type::parse_error;
+                    }
+                    break;
+                }
+
+                // remaining bytes (80..C1 and F5..FF) are ill-formed
+                default:
+                {
+                    error_message = "invalid string: ill-formed UTF-8 byte";
+                    return token_type::parse_error;
+                }
+            }
+        }
+    }
+
+    /*!
+     * @brief scan a comment
+     * @return whether comment could be scanned successfully
+     */
+    bool scan_comment()
+    {
+        switch (get())
+        {
+            // single-line comments skip input until a newline or EOF is read
+            case '/':
+            {
+                while (true)
+                {
+                    switch (get())
+                    {
+                        case '\n':
+                        case '\r':
+                        case std::char_traits<char_type>::eof():
+                        case '\0':
+                            return true;
+
+                        default:
+                            break;
+                    }
+                }
+            }
+
+            // multi-line comments skip input until */ is read
+            case '*':
+            {
+                while (true)
+                {
+                    switch (get())
+                    {
+                        case std::char_traits<char_type>::eof():
+                        case '\0':
+                        {
+                            error_message = "invalid comment; missing closing '*/'";
+                            return false;
+                        }
+
+                        case '*':
+                        {
+                            switch (get())
+                            {
+                                case '/':
+                                    return true;
+
+                                default:
+                                {
+                                    unget();
+                                    continue;
+                                }
+                            }
+                        }
+
+                        default:
+                            continue;
+                    }
+                }
+            }
+
+            // unexpected character after reading '/'
+            default:
+            {
+                error_message = "invalid comment; expecting '/' or '*' after '/'";
+                return false;
+            }
+        }
+    }
+
+    JSON_HEDLEY_NON_NULL(2)
+    static void strtof(float& f, const char* str, char** endptr) noexcept
+    {
+        f = std::strtof(str, endptr);
+    }
+
+    JSON_HEDLEY_NON_NULL(2)
+    static void strtof(double& f, const char* str, char** endptr) noexcept
+    {
+        f = std::strtod(str, endptr);
+    }
+
+    JSON_HEDLEY_NON_NULL(2)
+    static void strtof(long double& f, const char* str, char** endptr) noexcept
+    {
+        f = std::strtold(str, endptr);
+    }
+
+    /*!
+    @brief scan a number literal
+
+    This function scans a string according to Sect. 6 of RFC 8259.
+
+    The function is realized with a deterministic finite state machine derived
+    from the grammar described in RFC 8259. starting in state "init", the
+    input is read and used to determined the next state. Only state "done"
+    accepts the number. State "error" is a trap state to model errors. In the
+    table below, "anything" means any character but the ones listed before.
+
+    state    | 0        | 1-9      | e E      | +       | -       | .        | anything
+    ---------|----------|----------|----------|---------|---------|----------|-----------
+    init     | zero     | any1     | [error]  | [error] | minus   | [error]  | [error]
+    minus    | zero     | any1     | [error]  | [error] | [error] | [error]  | [error]
+    zero     | done     | done     | exponent | done    | done    | decimal1 | done
+    any1     | any1     | any1     | exponent | done    | done    | decimal1 | done
+    decimal1 | decimal2 | decimal2 | [error]  | [error] | [error] | [error]  | [error]
+    decimal2 | decimal2 | decimal2 | exponent | done    | done    | done     | done
+    exponent | any2     | any2     | [error]  | sign    | sign    | [error]  | [error]
+    sign     | any2     | any2     | [error]  | [error] | [error] | [error]  | [error]
+    any2     | any2     | any2     | done     | done    | done    | done     | done
+
+    The state machine is realized with one label per state (prefixed with
+    "scan_number_") and `goto` statements between them. The state machine
+    contains cycles, but any cycle can be left when EOF is read. Therefore,
+    the function is guaranteed to terminate.
+
+    During scanning, the read bytes are stored in token_buffer. This string is
+    then converted to a signed integer, an unsigned integer, or a
+    floating-point number.
+
+    @return token_type::value_unsigned, token_type::value_integer, or
+            token_type::value_float if number could be successfully scanned,
+            token_type::parse_error otherwise
+
+    @note The scanner is independent of the current locale. Internally, the
+          locale's decimal point is used instead of `.` to work with the
+          locale-dependent converters.
+    */
+    token_type scan_number()  // lgtm [cpp/use-of-goto]
+    {
+        // reset token_buffer to store the number's bytes
+        reset();
+
+        // the type of the parsed number; initially set to unsigned; will be
+        // changed if minus sign, decimal point or exponent is read
+        token_type number_type = token_type::value_unsigned;
+
+        // state (init): we just found out we need to scan a number
+        switch (current)
+        {
+            case '-':
+            {
+                add(current);
+                goto scan_number_minus;
+            }
+
+            case '0':
+            {
+                add(current);
+                goto scan_number_zero;
+            }
+
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+            {
+                add(current);
+                goto scan_number_any1;
+            }
+
+            // all other characters are rejected outside scan_number()
+            default:            // LCOV_EXCL_LINE
+                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
+        }
+
+scan_number_minus:
+        // state: we just parsed a leading minus sign
+        number_type = token_type::value_integer;
+        switch (get())
+        {
+            case '0':
+            {
+                add(current);
+                goto scan_number_zero;
+            }
+
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+            {
+                add(current);
+                goto scan_number_any1;
+            }
+
+            default:
+            {
+                error_message = "invalid number; expected digit after '-'";
+                return token_type::parse_error;
+            }
+        }
+
+scan_number_zero:
+        // state: we just parse a zero (maybe with a leading minus sign)
+        switch (get())
+        {
+            case '.':
+            {
+                add(decimal_point_char);
+                goto scan_number_decimal1;
+            }
+
+            case 'e':
+            case 'E':
+            {
+                add(current);
+                goto scan_number_exponent;
+            }
+
+            default:
+                goto scan_number_done;
+        }
+
+scan_number_any1:
+        // state: we just parsed a number 0-9 (maybe with a leading minus sign)
+        switch (get())
+        {
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+            {
+                add(current);
+                goto scan_number_any1;
+            }
+
+            case '.':
+            {
+                add(decimal_point_char);
+                goto scan_number_decimal1;
+            }
+
+            case 'e':
+            case 'E':
+            {
+                add(current);
+                goto scan_number_exponent;
+            }
+
+            default:
+                goto scan_number_done;
+        }
+
+scan_number_decimal1:
+        // state: we just parsed a decimal point
+        number_type = token_type::value_float;
+        switch (get())
+        {
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+            {
+                add(current);
+                goto scan_number_decimal2;
+            }
+
+            default:
+            {
+                error_message = "invalid number; expected digit after '.'";
+                return token_type::parse_error;
+            }
+        }
+
+scan_number_decimal2:
+        // we just parsed at least one number after a decimal point
+        switch (get())
+        {
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+            {
+                add(current);
+                goto scan_number_decimal2;
+            }
+
+            case 'e':
+            case 'E':
+            {
+                add(current);
+                goto scan_number_exponent;
+            }
+
+            default:
+                goto scan_number_done;
+        }
+
+scan_number_exponent:
+        // we just parsed an exponent
+        number_type = token_type::value_float;
+        switch (get())
+        {
+            case '+':
+            case '-':
+            {
+                add(current);
+                goto scan_number_sign;
+            }
+
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+            {
+                add(current);
+                goto scan_number_any2;
+            }
+
+            default:
+            {
+                error_message =
+                    "invalid number; expected '+', '-', or digit after exponent";
+                return token_type::parse_error;
+            }
+        }
+
+scan_number_sign:
+        // we just parsed an exponent sign
+        switch (get())
+        {
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+            {
+                add(current);
+                goto scan_number_any2;
+            }
+
+            default:
+            {
+                error_message = "invalid number; expected digit after exponent sign";
+                return token_type::parse_error;
+            }
+        }
+
+scan_number_any2:
+        // we just parsed a number after the exponent or exponent sign
+        switch (get())
+        {
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+            {
+                add(current);
+                goto scan_number_any2;
+            }
+
+            default:
+                goto scan_number_done;
+        }
+
+scan_number_done:
+        // unget the character after the number (we only read it to know that
+        // we are done scanning a number)
+        unget();
+
+        char* endptr = nullptr; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
+        errno = 0;
+
+        // try to parse integers first and fall back to floats
+        if (number_type == token_type::value_unsigned)
+        {
+            const auto x = std::strtoull(token_buffer.data(), &endptr, 10);
+
+            // we checked the number format before
+            JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
+
+            if (errno == 0)
+            {
+                value_unsigned = static_cast<number_unsigned_t>(x);
+                if (value_unsigned == x)
+                {
+                    return token_type::value_unsigned;
+                }
+            }
+        }
+        else if (number_type == token_type::value_integer)
+        {
+            const auto x = std::strtoll(token_buffer.data(), &endptr, 10);
+
+            // we checked the number format before
+            JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
+
+            if (errno == 0)
+            {
+                value_integer = static_cast<number_integer_t>(x);
+                if (value_integer == x)
+                {
+                    return token_type::value_integer;
+                }
+            }
+        }
+
+        // this code is reached if we parse a floating-point number or if an
+        // integer conversion above failed
+        strtof(value_float, token_buffer.data(), &endptr);
+
+        // we checked the number format before
+        JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
+
+        return token_type::value_float;
+    }
+
+    /*!
+    @param[in] literal_text  the literal text to expect
+    @param[in] length        the length of the passed literal text
+    @param[in] return_type   the token type to return on success
+    */
+    JSON_HEDLEY_NON_NULL(2)
+    token_type scan_literal(const char_type* literal_text, const std::size_t length,
+                            token_type return_type)
+    {
+        JSON_ASSERT(std::char_traits<char_type>::to_char_type(current) == literal_text[0]);
+        for (std::size_t i = 1; i < length; ++i)
+        {
+            if (JSON_HEDLEY_UNLIKELY(std::char_traits<char_type>::to_char_type(get()) != literal_text[i]))
+            {
+                error_message = "invalid literal";
+                return token_type::parse_error;
+            }
+        }
+        return return_type;
+    }
+
+    /////////////////////
+    // input management
+    /////////////////////
+
+    /// reset token_buffer; current character is beginning of token
+    void reset() noexcept
+    {
+        token_buffer.clear();
+        token_string.clear();
+        token_string.push_back(std::char_traits<char_type>::to_char_type(current));
+    }
+
+    /*
+    @brief get next character from the input
+
+    This function provides the interface to the used input adapter. It does
+    not throw in case the input reached EOF, but returns a
+    `std::char_traits<char>::eof()` in that case.  Stores the scanned characters
+    for use in error messages.
+
+    @return character read from the input
+    */
+    char_int_type get()
+    {
+        ++position.chars_read_total;
+        ++position.chars_read_current_line;
+
+        if (next_unget)
+        {
+            // just reset the next_unget variable and work with current
+            next_unget = false;
+        }
+        else
+        {
+            current = ia.get_character();
+        }
+
+        if (JSON_HEDLEY_LIKELY(current != std::char_traits<char_type>::eof()))
+        {
+            token_string.push_back(std::char_traits<char_type>::to_char_type(current));
+        }
+
+        if (current == '\n')
+        {
+            ++position.lines_read;
+            position.chars_read_current_line = 0;
+        }
+
+        return current;
+    }
+
+    /*!
+    @brief unget current character (read it again on next get)
+
+    We implement unget by setting variable next_unget to true. The input is not
+    changed - we just simulate ungetting by modifying chars_read_total,
+    chars_read_current_line, and token_string. The next call to get() will
+    behave as if the unget character is read again.
+    */
+    void unget()
+    {
+        next_unget = true;
+
+        --position.chars_read_total;
+
+        // in case we "unget" a newline, we have to also decrement the lines_read
+        if (position.chars_read_current_line == 0)
+        {
+            if (position.lines_read > 0)
+            {
+                --position.lines_read;
+            }
+        }
+        else
+        {
+            --position.chars_read_current_line;
+        }
+
+        if (JSON_HEDLEY_LIKELY(current != std::char_traits<char_type>::eof()))
+        {
+            JSON_ASSERT(!token_string.empty());
+            token_string.pop_back();
+        }
+    }
+
+    /// add a character to token_buffer
+    void add(char_int_type c)
+    {
+        token_buffer.push_back(static_cast<typename string_t::value_type>(c));
+    }
+
+  public:
+    /////////////////////
+    // value getters
+    /////////////////////
+
+    /// return integer value
+    constexpr number_integer_t get_number_integer() const noexcept
+    {
+        return value_integer;
+    }
+
+    /// return unsigned integer value
+    constexpr number_unsigned_t get_number_unsigned() const noexcept
+    {
+        return value_unsigned;
+    }
+
+    /// return floating-point value
+    constexpr number_float_t get_number_float() const noexcept
+    {
+        return value_float;
+    }
+
+    /// return current string value (implicitly resets the token; useful only once)
+    string_t& get_string()
+    {
+        return token_buffer;
+    }
+
+    /////////////////////
+    // diagnostics
+    /////////////////////
+
+    /// return position of last read token
+    constexpr position_t get_position() const noexcept
+    {
+        return position;
+    }
+
+    /// return the last read token (for errors only).  Will never contain EOF
+    /// (an arbitrary value that is not a valid char value, often -1), because
+    /// 255 may legitimately occur.  May contain NUL, which should be escaped.
+    std::string get_token_string() const
+    {
+        // escape control characters
+        std::string result;
+        for (const auto c : token_string)
+        {
+            if (static_cast<unsigned char>(c) <= '\x1F')
+            {
+                // escape control characters
+                std::array<char, 9> cs{{}};
+                static_cast<void>((std::snprintf)(cs.data(), cs.size(), "<U+%.4X>", static_cast<unsigned char>(c))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
+                result += cs.data();
+            }
+            else
+            {
+                // add character as is
+                result.push_back(static_cast<std::string::value_type>(c));
+            }
+        }
+
+        return result;
+    }
+
+    /// return syntax error message
+    JSON_HEDLEY_RETURNS_NON_NULL
+    constexpr const char* get_error_message() const noexcept
+    {
+        return error_message;
+    }
+
+    /////////////////////
+    // actual scanner
+    /////////////////////
+
+    /*!
+    @brief skip the UTF-8 byte order mark
+    @return true iff there is no BOM or the correct BOM has been skipped
+    */
+    bool skip_bom()
+    {
+        if (get() == 0xEF)
+        {
+            // check if we completely parse the BOM
+            return get() == 0xBB && get() == 0xBF;
+        }
+
+        // the first character is not the beginning of the BOM; unget it to
+        // process is later
+        unget();
+        return true;
+    }
+
+    void skip_whitespace()
+    {
+        do
+        {
+            get();
+        }
+        while (current == ' ' || current == '\t' || current == '\n' || current == '\r');
+    }
+
+    token_type scan()
+    {
+        // initially, skip the BOM
+        if (position.chars_read_total == 0 && !skip_bom())
+        {
+            error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
+            return token_type::parse_error;
+        }
+
+        // read next character and ignore whitespace
+        skip_whitespace();
+
+        // ignore comments
+        while (ignore_comments && current == '/')
+        {
+            if (!scan_comment())
+            {
+                return token_type::parse_error;
+            }
+
+            // skip following whitespace
+            skip_whitespace();
+        }
+
+        switch (current)
+        {
+            // structural characters
+            case '[':
+                return token_type::begin_array;
+            case ']':
+                return token_type::end_array;
+            case '{':
+                return token_type::begin_object;
+            case '}':
+                return token_type::end_object;
+            case ':':
+                return token_type::name_separator;
+            case ',':
+                return token_type::value_separator;
+
+            // literals
+            case 't':
+            {
+                std::array<char_type, 4> true_literal = {{static_cast<char_type>('t'), static_cast<char_type>('r'), static_cast<char_type>('u'), static_cast<char_type>('e')}};
+                return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true);
+            }
+            case 'f':
+            {
+                std::array<char_type, 5> false_literal = {{static_cast<char_type>('f'), static_cast<char_type>('a'), static_cast<char_type>('l'), static_cast<char_type>('s'), static_cast<char_type>('e')}};
+                return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false);
+            }
+            case 'n':
+            {
+                std::array<char_type, 4> null_literal = {{static_cast<char_type>('n'), static_cast<char_type>('u'), static_cast<char_type>('l'), static_cast<char_type>('l')}};
+                return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null);
+            }
+
+            // string
+            case '\"':
+                return scan_string();
+
+            // number
+            case '-':
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+                return scan_number();
+
+            // end of input (the null byte is needed when parsing from
+            // string literals)
+            case '\0':
+            case std::char_traits<char_type>::eof():
+                return token_type::end_of_input;
+
+            // error
+            default:
+                error_message = "invalid literal";
+                return token_type::parse_error;
+        }
+    }
+
+  private:
+    /// input adapter
+    InputAdapterType ia;
+
+    /// whether comments should be ignored (true) or signaled as errors (false)
+    const bool ignore_comments = false;
+
+    /// the current character
+    char_int_type current = std::char_traits<char_type>::eof();
+
+    /// whether the next get() call should just return current
+    bool next_unget = false;
+
+    /// the start position of the current token
+    position_t position {};
+
+    /// raw input token string (for error messages)
+    std::vector<char_type> token_string {};
+
+    /// buffer for variable-length tokens (numbers, strings)
+    string_t token_buffer {};
+
+    /// a description of occurred lexer errors
+    const char* error_message = "";
+
+    // number values
+    number_integer_t value_integer = 0;
+    number_unsigned_t value_unsigned = 0;
+    number_float_t value_float = 0;
+
+    /// the decimal point
+    const char_int_type decimal_point_char = '.';
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/meta/is_sax.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <cstdint> // size_t
+#include <utility> // declval
+#include <string> // string
+
+// #include <qualla/detail/abi_macros.hpp>
+
+// #include <qualla/detail/meta/detected.hpp>
+
+// #include <qualla/detail/meta/type_traits.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+template<typename T>
+using null_function_t = decltype(std::declval<T&>().null());
+
+template<typename T>
+using boolean_function_t =
+    decltype(std::declval<T&>().boolean(std::declval<bool>()));
+
+template<typename T, typename Integer>
+using number_integer_function_t =
+    decltype(std::declval<T&>().number_integer(std::declval<Integer>()));
+
+template<typename T, typename Unsigned>
+using number_unsigned_function_t =
+    decltype(std::declval<T&>().number_unsigned(std::declval<Unsigned>()));
+
+template<typename T, typename Float, typename String>
+using number_float_function_t = decltype(std::declval<T&>().number_float(
+                                    std::declval<Float>(), std::declval<const String&>()));
+
+template<typename T, typename String>
+using string_function_t =
+    decltype(std::declval<T&>().string(std::declval<String&>()));
+
+template<typename T, typename Binary>
+using binary_function_t =
+    decltype(std::declval<T&>().binary(std::declval<Binary&>()));
+
+template<typename T>
+using start_object_function_t =
+    decltype(std::declval<T&>().start_object(std::declval<std::size_t>()));
+
+template<typename T, typename String>
+using key_function_t =
+    decltype(std::declval<T&>().key(std::declval<String&>()));
+
+template<typename T>
+using end_object_function_t = decltype(std::declval<T&>().end_object());
+
+template<typename T>
+using start_array_function_t =
+    decltype(std::declval<T&>().start_array(std::declval<std::size_t>()));
+
+template<typename T>
+using end_array_function_t = decltype(std::declval<T&>().end_array());
+
+template<typename T, typename Exception>
+using parse_error_function_t = decltype(std::declval<T&>().parse_error(
+        std::declval<std::size_t>(), std::declval<const std::string&>(),
+        std::declval<const Exception&>()));
+
+template<typename SAX, typename BasicJsonType>
+struct is_sax
+{
+  private:
+    static_assert(is_basic_json<BasicJsonType>::value,
+                  "BasicJsonType must be of type basic_json<...>");
+
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+    using string_t = typename BasicJsonType::string_t;
+    using binary_t = typename BasicJsonType::binary_t;
+    using exception_t = typename BasicJsonType::exception;
+
+  public:
+    static constexpr bool value =
+        is_detected_exact<bool, null_function_t, SAX>::value &&
+        is_detected_exact<bool, boolean_function_t, SAX>::value &&
+        is_detected_exact<bool, number_integer_function_t, SAX, number_integer_t>::value &&
+        is_detected_exact<bool, number_unsigned_function_t, SAX, number_unsigned_t>::value &&
+        is_detected_exact<bool, number_float_function_t, SAX, number_float_t, string_t>::value &&
+        is_detected_exact<bool, string_function_t, SAX, string_t>::value &&
+        is_detected_exact<bool, binary_function_t, SAX, binary_t>::value &&
+        is_detected_exact<bool, start_object_function_t, SAX>::value &&
+        is_detected_exact<bool, key_function_t, SAX, string_t>::value &&
+        is_detected_exact<bool, end_object_function_t, SAX>::value &&
+        is_detected_exact<bool, start_array_function_t, SAX>::value &&
+        is_detected_exact<bool, end_array_function_t, SAX>::value &&
+        is_detected_exact<bool, parse_error_function_t, SAX, exception_t>::value;
+};
+
+template<typename SAX, typename BasicJsonType>
+struct is_sax_static_asserts
+{
+  private:
+    static_assert(is_basic_json<BasicJsonType>::value,
+                  "BasicJsonType must be of type basic_json<...>");
+
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+    using string_t = typename BasicJsonType::string_t;
+    using binary_t = typename BasicJsonType::binary_t;
+    using exception_t = typename BasicJsonType::exception;
+
+  public:
+    static_assert(is_detected_exact<bool, null_function_t, SAX>::value,
+                  "Missing/invalid function: bool null()");
+    static_assert(is_detected_exact<bool, boolean_function_t, SAX>::value,
+                  "Missing/invalid function: bool boolean(bool)");
+    static_assert(is_detected_exact<bool, boolean_function_t, SAX>::value,
+                  "Missing/invalid function: bool boolean(bool)");
+    static_assert(
+        is_detected_exact<bool, number_integer_function_t, SAX,
+        number_integer_t>::value,
+        "Missing/invalid function: bool number_integer(number_integer_t)");
+    static_assert(
+        is_detected_exact<bool, number_unsigned_function_t, SAX,
+        number_unsigned_t>::value,
+        "Missing/invalid function: bool number_unsigned(number_unsigned_t)");
+    static_assert(is_detected_exact<bool, number_float_function_t, SAX,
+                  number_float_t, string_t>::value,
+                  "Missing/invalid function: bool number_float(number_float_t, const string_t&)");
+    static_assert(
+        is_detected_exact<bool, string_function_t, SAX, string_t>::value,
+        "Missing/invalid function: bool string(string_t&)");
+    static_assert(
+        is_detected_exact<bool, binary_function_t, SAX, binary_t>::value,
+        "Missing/invalid function: bool binary(binary_t&)");
+    static_assert(is_detected_exact<bool, start_object_function_t, SAX>::value,
+                  "Missing/invalid function: bool start_object(std::size_t)");
+    static_assert(is_detected_exact<bool, key_function_t, SAX, string_t>::value,
+                  "Missing/invalid function: bool key(string_t&)");
+    static_assert(is_detected_exact<bool, end_object_function_t, SAX>::value,
+                  "Missing/invalid function: bool end_object()");
+    static_assert(is_detected_exact<bool, start_array_function_t, SAX>::value,
+                  "Missing/invalid function: bool start_array(std::size_t)");
+    static_assert(is_detected_exact<bool, end_array_function_t, SAX>::value,
+                  "Missing/invalid function: bool end_array()");
+    static_assert(
+        is_detected_exact<bool, parse_error_function_t, SAX, exception_t>::value,
+        "Missing/invalid function: bool parse_error(std::size_t, const "
+        "std::string&, const exception&)");
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/meta/type_traits.hpp>
+
+// #include <qualla/detail/string_concat.hpp>
+
+// #include <qualla/detail/value_t.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+/// how to treat CBOR tags
+enum class cbor_tag_handler_t
+{
+    error,   ///< throw a parse_error exception in case of a tag
+    ignore,  ///< ignore tags
+    store    ///< store tags as binary type
+};
+
+/*!
+@brief determine system byte order
+
+@return true if and only if system's byte order is little endian
+
+@note from https://stackoverflow.com/a/1001328/266378
+*/
+static inline bool little_endianness(int num = 1) noexcept
+{
+    return *reinterpret_cast<char*>(&num) == 1;
+}
+
+
+///////////////////
+// binary reader //
+///////////////////
+
+/*!
+@brief deserialization of CBOR, MessagePack, and UBJSON values
+*/
+template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>>
+class binary_reader
+{
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+    using string_t = typename BasicJsonType::string_t;
+    using binary_t = typename BasicJsonType::binary_t;
+    using json_sax_t = SAX;
+    using char_type = typename InputAdapterType::char_type;
+    using char_int_type = typename std::char_traits<char_type>::int_type;
+
+  public:
+    /*!
+    @brief create a binary reader
+
+    @param[in] adapter  input adapter to read from
+    */
+    explicit binary_reader(InputAdapterType&& adapter, const input_format_t format = input_format_t::json) noexcept : ia(std::move(adapter)), input_format(format)
+    {
+        (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
+    }
+
+    // make class move-only
+    binary_reader(const binary_reader&) = delete;
+    binary_reader(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
+    binary_reader& operator=(const binary_reader&) = delete;
+    binary_reader& operator=(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
+    ~binary_reader() = default;
+
+    /*!
+    @param[in] format  the binary format to parse
+    @param[in] sax_    a SAX event processor
+    @param[in] strict  whether to expect the input to be consumed completed
+    @param[in] tag_handler  how to treat CBOR tags
+
+    @return whether parsing was successful
+    */
+    JSON_HEDLEY_NON_NULL(3)
+    bool sax_parse(const input_format_t format,
+                   json_sax_t* sax_,
+                   const bool strict = true,
+                   const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
+    {
+        sax = sax_;
+        bool result = false;
+
+        switch (format)
+        {
+            case input_format_t::bson:
+                result = parse_bson_internal();
+                break;
+
+            case input_format_t::cbor:
+                result = parse_cbor_internal(true, tag_handler);
+                break;
+
+            case input_format_t::msgpack:
+                result = parse_msgpack_internal();
+                break;
+
+            case input_format_t::ubjson:
+            case input_format_t::bjdata:
+                result = parse_ubjson_internal();
+                break;
+
+            case input_format_t::json: // LCOV_EXCL_LINE
+            default:            // LCOV_EXCL_LINE
+                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
+        }
+
+        // strict mode: next byte must be EOF
+        if (result && strict)
+        {
+            if (input_format == input_format_t::ubjson || input_format == input_format_t::bjdata)
+            {
+                get_ignore_noop();
+            }
+            else
+            {
+                get();
+            }
+
+            if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char_type>::eof()))
+            {
+                return sax->parse_error(chars_read, get_token_string(), parse_error::create(110, chars_read,
+                                        exception_message(input_format, concat("expected end of input; last byte: 0x", get_token_string()), "value"), nullptr));
+            }
+        }
+
+        return result;
+    }
+
+  private:
+    //////////
+    // BSON //
+    //////////
+
+    /*!
+    @brief Reads in a BSON-object and passes it to the SAX-parser.
+    @return whether a valid BSON-value was passed to the SAX parser
+    */
+    bool parse_bson_internal()
+    {
+        std::int32_t document_size{};
+        get_number<std::int32_t, true>(input_format_t::bson, document_size);
+
+        if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
+        {
+            return false;
+        }
+
+        if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/false)))
+        {
+            return false;
+        }
+
+        return sax->end_object();
+    }
+
+    /*!
+    @brief Parses a C-style string from the BSON input.
+    @param[in,out] result  A reference to the string variable where the read
+                            string is to be stored.
+    @return `true` if the \x00-byte indicating the end of the string was
+             encountered before the EOF; false` indicates an unexpected EOF.
+    */
+    bool get_bson_cstr(string_t& result)
+    {
+        auto out = std::back_inserter(result);
+        while (true)
+        {
+            get();
+            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "cstring")))
+            {
+                return false;
+            }
+            if (current == 0x00)
+            {
+                return true;
+            }
+            *out++ = static_cast<typename string_t::value_type>(current);
+        }
+    }
+
+    /*!
+    @brief Parses a zero-terminated string of length @a len from the BSON
+           input.
+    @param[in] len  The length (including the zero-byte at the end) of the
+                    string to be read.
+    @param[in,out] result  A reference to the string variable where the read
+                            string is to be stored.
+    @tparam NumberType The type of the length @a len
+    @pre len >= 1
+    @return `true` if the string was successfully parsed
+    */
+    template<typename NumberType>
+    bool get_bson_string(const NumberType len, string_t& result)
+    {
+        if (JSON_HEDLEY_UNLIKELY(len < 1))
+        {
+            auto last_token = get_token_string();
+            return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
+                                    exception_message(input_format_t::bson, concat("string length must be at least 1, is ", std::to_string(len)), "string"), nullptr));
+        }
+
+        return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) && get() != std::char_traits<char_type>::eof();
+    }
+
+    /*!
+    @brief Parses a byte array input of length @a len from the BSON input.
+    @param[in] len  The length of the byte array to be read.
+    @param[in,out] result  A reference to the binary variable where the read
+                            array is to be stored.
+    @tparam NumberType The type of the length @a len
+    @pre len >= 0
+    @return `true` if the byte array was successfully parsed
+    */
+    template<typename NumberType>
+    bool get_bson_binary(const NumberType len, binary_t& result)
+    {
+        if (JSON_HEDLEY_UNLIKELY(len < 0))
+        {
+            auto last_token = get_token_string();
+            return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
+                                    exception_message(input_format_t::bson, concat("byte array length cannot be negative, is ", std::to_string(len)), "binary"), nullptr));
+        }
+
+        // All BSON binary values have a subtype
+        std::uint8_t subtype{};
+        get_number<std::uint8_t>(input_format_t::bson, subtype);
+        result.set_subtype(subtype);
+
+        return get_binary(input_format_t::bson, len, result);
+    }
+
+    /*!
+    @brief Read a BSON document element of the given @a element_type.
+    @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
+    @param[in] element_type_parse_position The position in the input stream,
+               where the `element_type` was read.
+    @warning Not all BSON element types are supported yet. An unsupported
+             @a element_type will give rise to a parse_error.114:
+             Unsupported BSON record type 0x...
+    @return whether a valid BSON-object/array was passed to the SAX parser
+    */
+    bool parse_bson_element_internal(const char_int_type element_type,
+                                     const std::size_t element_type_parse_position)
+    {
+        switch (element_type)
+        {
+            case 0x01: // double
+            {
+                double number{};
+                return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), "");
+            }
+
+            case 0x02: // string
+            {
+                std::int32_t len{};
+                string_t value;
+                return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value);
+            }
+
+            case 0x03: // object
+            {
+                return parse_bson_internal();
+            }
+
+            case 0x04: // array
+            {
+                return parse_bson_array();
+            }
+
+            case 0x05: // binary
+            {
+                std::int32_t len{};
+                binary_t value;
+                return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value);
+            }
+
+            case 0x08: // boolean
+            {
+                return sax->boolean(get() != 0);
+            }
+
+            case 0x0A: // null
+            {
+                return sax->null();
+            }
+
+            case 0x10: // int32
+            {
+                std::int32_t value{};
+                return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value);
+            }
+
+            case 0x12: // int64
+            {
+                std::int64_t value{};
+                return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value);
+            }
+
+            default: // anything else not supported (yet)
+            {
+                std::array<char, 3> cr{{}};
+                static_cast<void>((std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
+                std::string cr_str{cr.data()};
+                return sax->parse_error(element_type_parse_position, cr_str,
+                                        parse_error::create(114, element_type_parse_position, concat("Unsupported BSON record type 0x", cr_str), nullptr));
+            }
+        }
+    }
+
+    /*!
+    @brief Read a BSON element list (as specified in the BSON-spec)
+
+    The same binary layout is used for objects and arrays, hence it must be
+    indicated with the argument @a is_array which one is expected
+    (true --> array, false --> object).
+
+    @param[in] is_array Determines if the element list being read is to be
+                        treated as an object (@a is_array == false), or as an
+                        array (@a is_array == true).
+    @return whether a valid BSON-object/array was passed to the SAX parser
+    */
+    bool parse_bson_element_list(const bool is_array)
+    {
+        string_t key;
+
+        while (auto element_type = get())
+        {
+            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "element list")))
+            {
+                return false;
+            }
+
+            const std::size_t element_type_parse_position = chars_read;
+            if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key)))
+            {
+                return false;
+            }
+
+            if (!is_array && !sax->key(key))
+            {
+                return false;
+            }
+
+            if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position)))
+            {
+                return false;
+            }
+
+            // get_bson_cstr only appends
+            key.clear();
+        }
+
+        return true;
+    }
+
+    /*!
+    @brief Reads an array from the BSON input and passes it to the SAX-parser.
+    @return whether a valid BSON-array was passed to the SAX parser
+    */
+    bool parse_bson_array()
+    {
+        std::int32_t document_size{};
+        get_number<std::int32_t, true>(input_format_t::bson, document_size);
+
+        if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
+        {
+            return false;
+        }
+
+        if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/true)))
+        {
+            return false;
+        }
+
+        return sax->end_array();
+    }
+
+    //////////
+    // CBOR //
+    //////////
+
+    /*!
+    @param[in] get_char  whether a new character should be retrieved from the
+                         input (true) or whether the last read character should
+                         be considered instead (false)
+    @param[in] tag_handler how CBOR tags should be treated
+
+    @return whether a valid CBOR value was passed to the SAX parser
+    */
+    bool parse_cbor_internal(const bool get_char,
+                             const cbor_tag_handler_t tag_handler)
+    {
+        switch (get_char ? get() : current)
+        {
+            // EOF
+            case std::char_traits<char_type>::eof():
+                return unexpect_eof(input_format_t::cbor, "value");
+
+            // Integer 0x00..0x17 (0..23)
+            case 0x00:
+            case 0x01:
+            case 0x02:
+            case 0x03:
+            case 0x04:
+            case 0x05:
+            case 0x06:
+            case 0x07:
+            case 0x08:
+            case 0x09:
+            case 0x0A:
+            case 0x0B:
+            case 0x0C:
+            case 0x0D:
+            case 0x0E:
+            case 0x0F:
+            case 0x10:
+            case 0x11:
+            case 0x12:
+            case 0x13:
+            case 0x14:
+            case 0x15:
+            case 0x16:
+            case 0x17:
+                return sax->number_unsigned(static_cast<number_unsigned_t>(current));
+
+            case 0x18: // Unsigned integer (one-byte uint8_t follows)
+            {
+                std::uint8_t number{};
+                return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
+            }
+
+            case 0x19: // Unsigned integer (two-byte uint16_t follows)
+            {
+                std::uint16_t number{};
+                return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
+            }
+
+            case 0x1A: // Unsigned integer (four-byte uint32_t follows)
+            {
+                std::uint32_t number{};
+                return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
+            }
+
+            case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
+            {
+                std::uint64_t number{};
+                return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
+            }
+
+            // Negative integer -1-0x00..-1-0x17 (-1..-24)
+            case 0x20:
+            case 0x21:
+            case 0x22:
+            case 0x23:
+            case 0x24:
+            case 0x25:
+            case 0x26:
+            case 0x27:
+            case 0x28:
+            case 0x29:
+            case 0x2A:
+            case 0x2B:
+            case 0x2C:
+            case 0x2D:
+            case 0x2E:
+            case 0x2F:
+            case 0x30:
+            case 0x31:
+            case 0x32:
+            case 0x33:
+            case 0x34:
+            case 0x35:
+            case 0x36:
+            case 0x37:
+                return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
+
+            case 0x38: // Negative integer (one-byte uint8_t follows)
+            {
+                std::uint8_t number{};
+                return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
+            }
+
+            case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
+            {
+                std::uint16_t number{};
+                return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
+            }
+
+            case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
+            {
+                std::uint32_t number{};
+                return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
+            }
+
+            case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
+            {
+                std::uint64_t number{};
+                return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1)
+                        - static_cast<number_integer_t>(number));
+            }
+
+            // Binary data (0x00..0x17 bytes follow)
+            case 0x40:
+            case 0x41:
+            case 0x42:
+            case 0x43:
+            case 0x44:
+            case 0x45:
+            case 0x46:
+            case 0x47:
+            case 0x48:
+            case 0x49:
+            case 0x4A:
+            case 0x4B:
+            case 0x4C:
+            case 0x4D:
+            case 0x4E:
+            case 0x4F:
+            case 0x50:
+            case 0x51:
+            case 0x52:
+            case 0x53:
+            case 0x54:
+            case 0x55:
+            case 0x56:
+            case 0x57:
+            case 0x58: // Binary data (one-byte uint8_t for n follows)
+            case 0x59: // Binary data (two-byte uint16_t for n follow)
+            case 0x5A: // Binary data (four-byte uint32_t for n follow)
+            case 0x5B: // Binary data (eight-byte uint64_t for n follow)
+            case 0x5F: // Binary data (indefinite length)
+            {
+                binary_t b;
+                return get_cbor_binary(b) && sax->binary(b);
+            }
+
+            // UTF-8 string (0x00..0x17 bytes follow)
+            case 0x60:
+            case 0x61:
+            case 0x62:
+            case 0x63:
+            case 0x64:
+            case 0x65:
+            case 0x66:
+            case 0x67:
+            case 0x68:
+            case 0x69:
+            case 0x6A:
+            case 0x6B:
+            case 0x6C:
+            case 0x6D:
+            case 0x6E:
+            case 0x6F:
+            case 0x70:
+            case 0x71:
+            case 0x72:
+            case 0x73:
+            case 0x74:
+            case 0x75:
+            case 0x76:
+            case 0x77:
+            case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
+            case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
+            case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
+            case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
+            case 0x7F: // UTF-8 string (indefinite length)
+            {
+                string_t s;
+                return get_cbor_string(s) && sax->string(s);
+            }
+
+            // array (0x00..0x17 data items follow)
+            case 0x80:
+            case 0x81:
+            case 0x82:
+            case 0x83:
+            case 0x84:
+            case 0x85:
+            case 0x86:
+            case 0x87:
+            case 0x88:
+            case 0x89:
+            case 0x8A:
+            case 0x8B:
+            case 0x8C:
+            case 0x8D:
+            case 0x8E:
+            case 0x8F:
+            case 0x90:
+            case 0x91:
+            case 0x92:
+            case 0x93:
+            case 0x94:
+            case 0x95:
+            case 0x96:
+            case 0x97:
+                return get_cbor_array(
+                           conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
+
+            case 0x98: // array (one-byte uint8_t for n follows)
+            {
+                std::uint8_t len{};
+                return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
+            }
+
+            case 0x99: // array (two-byte uint16_t for n follow)
+            {
+                std::uint16_t len{};
+                return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
+            }
+
+            case 0x9A: // array (four-byte uint32_t for n follow)
+            {
+                std::uint32_t len{};
+                return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler);
+            }
+
+            case 0x9B: // array (eight-byte uint64_t for n follow)
+            {
+                std::uint64_t len{};
+                return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler);
+            }
+
+            case 0x9F: // array (indefinite length)
+                return get_cbor_array(static_cast<std::size_t>(-1), tag_handler);
+
+            // map (0x00..0x17 pairs of data items follow)
+            case 0xA0:
+            case 0xA1:
+            case 0xA2:
+            case 0xA3:
+            case 0xA4:
+            case 0xA5:
+            case 0xA6:
+            case 0xA7:
+            case 0xA8:
+            case 0xA9:
+            case 0xAA:
+            case 0xAB:
+            case 0xAC:
+            case 0xAD:
+            case 0xAE:
+            case 0xAF:
+            case 0xB0:
+            case 0xB1:
+            case 0xB2:
+            case 0xB3:
+            case 0xB4:
+            case 0xB5:
+            case 0xB6:
+            case 0xB7:
+                return get_cbor_object(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
+
+            case 0xB8: // map (one-byte uint8_t for n follows)
+            {
+                std::uint8_t len{};
+                return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
+            }
+
+            case 0xB9: // map (two-byte uint16_t for n follow)
+            {
+                std::uint16_t len{};
+                return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
+            }
+
+            case 0xBA: // map (four-byte uint32_t for n follow)
+            {
+                std::uint32_t len{};
+                return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler);
+            }
+
+            case 0xBB: // map (eight-byte uint64_t for n follow)
+            {
+                std::uint64_t len{};
+                return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler);
+            }
+
+            case 0xBF: // map (indefinite length)
+                return get_cbor_object(static_cast<std::size_t>(-1), tag_handler);
+
+            case 0xC6: // tagged item
+            case 0xC7:
+            case 0xC8:
+            case 0xC9:
+            case 0xCA:
+            case 0xCB:
+            case 0xCC:
+            case 0xCD:
+            case 0xCE:
+            case 0xCF:
+            case 0xD0:
+            case 0xD1:
+            case 0xD2:
+            case 0xD3:
+            case 0xD4:
+            case 0xD8: // tagged item (1 bytes follow)
+            case 0xD9: // tagged item (2 bytes follow)
+            case 0xDA: // tagged item (4 bytes follow)
+            case 0xDB: // tagged item (8 bytes follow)
+            {
+                switch (tag_handler)
+                {
+                    case cbor_tag_handler_t::error:
+                    {
+                        auto last_token = get_token_string();
+                        return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
+                                                exception_message(input_format_t::cbor, concat("invalid byte: 0x", last_token), "value"), nullptr));
+                    }
+
+                    case cbor_tag_handler_t::ignore:
+                    {
+                        // ignore binary subtype
+                        switch (current)
+                        {
+                            case 0xD8:
+                            {
+                                std::uint8_t subtype_to_ignore{};
+                                get_number(input_format_t::cbor, subtype_to_ignore);
+                                break;
+                            }
+                            case 0xD9:
+                            {
+                                std::uint16_t subtype_to_ignore{};
+                                get_number(input_format_t::cbor, subtype_to_ignore);
+                                break;
+                            }
+                            case 0xDA:
+                            {
+                                std::uint32_t subtype_to_ignore{};
+                                get_number(input_format_t::cbor, subtype_to_ignore);
+                                break;
+                            }
+                            case 0xDB:
+                            {
+                                std::uint64_t subtype_to_ignore{};
+                                get_number(input_format_t::cbor, subtype_to_ignore);
+                                break;
+                            }
+                            default:
+                                break;
+                        }
+                        return parse_cbor_internal(true, tag_handler);
+                    }
+
+                    case cbor_tag_handler_t::store:
+                    {
+                        binary_t b;
+                        // use binary subtype and store in binary container
+                        switch (current)
+                        {
+                            case 0xD8:
+                            {
+                                std::uint8_t subtype{};
+                                get_number(input_format_t::cbor, subtype);
+                                b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
+                                break;
+                            }
+                            case 0xD9:
+                            {
+                                std::uint16_t subtype{};
+                                get_number(input_format_t::cbor, subtype);
+                                b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
+                                break;
+                            }
+                            case 0xDA:
+                            {
+                                std::uint32_t subtype{};
+                                get_number(input_format_t::cbor, subtype);
+                                b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
+                                break;
+                            }
+                            case 0xDB:
+                            {
+                                std::uint64_t subtype{};
+                                get_number(input_format_t::cbor, subtype);
+                                b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
+                                break;
+                            }
+                            default:
+                                return parse_cbor_internal(true, tag_handler);
+                        }
+                        get();
+                        return get_cbor_binary(b) && sax->binary(b);
+                    }
+
+                    default:                 // LCOV_EXCL_LINE
+                        JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
+                        return false;        // LCOV_EXCL_LINE
+                }
+            }
+
+            case 0xF4: // false
+                return sax->boolean(false);
+
+            case 0xF5: // true
+                return sax->boolean(true);
+
+            case 0xF6: // null
+                return sax->null();
+
+            case 0xF9: // Half-Precision Float (two-byte IEEE 754)
+            {
+                const auto byte1_raw = get();
+                if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
+                {
+                    return false;
+                }
+                const auto byte2_raw = get();
+                if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
+                {
+                    return false;
+                }
+
+                const auto byte1 = static_cast<unsigned char>(byte1_raw);
+                const auto byte2 = static_cast<unsigned char>(byte2_raw);
+
+                // code from RFC 7049, Appendix D, Figure 3:
+                // As half-precision floating-point numbers were only added
+                // to IEEE 754 in 2008, today's programming platforms often
+                // still only have limited support for them. It is very
+                // easy to include at least decoding support for them even
+                // without such support. An example of a small decoder for
+                // half-precision floating-point numbers in the C language
+                // is shown in Fig. 3.
+                const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2);
+                const double val = [&half]
+                {
+                    const int exp = (half >> 10u) & 0x1Fu;
+                    const unsigned int mant = half & 0x3FFu;
+                    JSON_ASSERT(0 <= exp&& exp <= 32);
+                    JSON_ASSERT(mant <= 1024);
+                    switch (exp)
+                    {
+                        case 0:
+                            return std::ldexp(mant, -24);
+                        case 31:
+                            return (mant == 0)
+                            ? std::numeric_limits<double>::infinity()
+                            : std::numeric_limits<double>::quiet_NaN();
+                        default:
+                            return std::ldexp(mant + 1024, exp - 25);
+                    }
+                }();
+                return sax->number_float((half & 0x8000u) != 0
+                                         ? static_cast<number_float_t>(-val)
+                                         : static_cast<number_float_t>(val), "");
+            }
+
+            case 0xFA: // Single-Precision Float (four-byte IEEE 754)
+            {
+                float number{};
+                return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
+            }
+
+            case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
+            {
+                double number{};
+                return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
+            }
+
+            default: // anything else (0xFF is handled inside the other types)
+            {
+                auto last_token = get_token_string();
+                return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
+                                        exception_message(input_format_t::cbor, concat("invalid byte: 0x", last_token), "value"), nullptr));
+            }
+        }
+    }
+
+    /*!
+    @brief reads a CBOR string
+
+    This function first reads starting bytes to determine the expected
+    string length and then copies this number of bytes into a string.
+    Additionally, CBOR's strings with indefinite lengths are supported.
+
+    @param[out] result  created string
+
+    @return whether string creation completed
+    */
+    bool get_cbor_string(string_t& result)
+    {
+        if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "string")))
+        {
+            return false;
+        }
+
+        switch (current)
+        {
+            // UTF-8 string (0x00..0x17 bytes follow)
+            case 0x60:
+            case 0x61:
+            case 0x62:
+            case 0x63:
+            case 0x64:
+            case 0x65:
+            case 0x66:
+            case 0x67:
+            case 0x68:
+            case 0x69:
+            case 0x6A:
+            case 0x6B:
+            case 0x6C:
+            case 0x6D:
+            case 0x6E:
+            case 0x6F:
+            case 0x70:
+            case 0x71:
+            case 0x72:
+            case 0x73:
+            case 0x74:
+            case 0x75:
+            case 0x76:
+            case 0x77:
+            {
+                return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
+            }
+
+            case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
+            {
+                std::uint8_t len{};
+                return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
+            }
+
+            case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
+            {
+                std::uint16_t len{};
+                return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
+            }
+
+            case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
+            {
+                std::uint32_t len{};
+                return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
+            }
+
+            case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
+            {
+                std::uint64_t len{};
+                return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
+            }
+
+            case 0x7F: // UTF-8 string (indefinite length)
+            {
+                while (get() != 0xFF)
+                {
+                    string_t chunk;
+                    if (!get_cbor_string(chunk))
+                    {
+                        return false;
+                    }
+                    result.append(chunk);
+                }
+                return true;
+            }
+
+            default:
+            {
+                auto last_token = get_token_string();
+                return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
+                                        exception_message(input_format_t::cbor, concat("expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x", last_token), "string"), nullptr));
+            }
+        }
+    }
+
+    /*!
+    @brief reads a CBOR byte array
+
+    This function first reads starting bytes to determine the expected
+    byte array length and then copies this number of bytes into the byte array.
+    Additionally, CBOR's byte arrays with indefinite lengths are supported.
+
+    @param[out] result  created byte array
+
+    @return whether byte array creation completed
+    */
+    bool get_cbor_binary(binary_t& result)
+    {
+        if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "binary")))
+        {
+            return false;
+        }
+
+        switch (current)
+        {
+            // Binary data (0x00..0x17 bytes follow)
+            case 0x40:
+            case 0x41:
+            case 0x42:
+            case 0x43:
+            case 0x44:
+            case 0x45:
+            case 0x46:
+            case 0x47:
+            case 0x48:
+            case 0x49:
+            case 0x4A:
+            case 0x4B:
+            case 0x4C:
+            case 0x4D:
+            case 0x4E:
+            case 0x4F:
+            case 0x50:
+            case 0x51:
+            case 0x52:
+            case 0x53:
+            case 0x54:
+            case 0x55:
+            case 0x56:
+            case 0x57:
+            {
+                return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
+            }
+
+            case 0x58: // Binary data (one-byte uint8_t for n follows)
+            {
+                std::uint8_t len{};
+                return get_number(input_format_t::cbor, len) &&
+                       get_binary(input_format_t::cbor, len, result);
+            }
+
+            case 0x59: // Binary data (two-byte uint16_t for n follow)
+            {
+                std::uint16_t len{};
+                return get_number(input_format_t::cbor, len) &&
+                       get_binary(input_format_t::cbor, len, result);
+            }
+
+            case 0x5A: // Binary data (four-byte uint32_t for n follow)
+            {
+                std::uint32_t len{};
+                return get_number(input_format_t::cbor, len) &&
+                       get_binary(input_format_t::cbor, len, result);
+            }
+
+            case 0x5B: // Binary data (eight-byte uint64_t for n follow)
+            {
+                std::uint64_t len{};
+                return get_number(input_format_t::cbor, len) &&
+                       get_binary(input_format_t::cbor, len, result);
+            }
+
+            case 0x5F: // Binary data (indefinite length)
+            {
+                while (get() != 0xFF)
+                {
+                    binary_t chunk;
+                    if (!get_cbor_binary(chunk))
+                    {
+                        return false;
+                    }
+                    result.insert(result.end(), chunk.begin(), chunk.end());
+                }
+                return true;
+            }
+
+            default:
+            {
+                auto last_token = get_token_string();
+                return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
+                                        exception_message(input_format_t::cbor, concat("expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x", last_token), "binary"), nullptr));
+            }
+        }
+    }
+
+    /*!
+    @param[in] len  the length of the array or static_cast<std::size_t>(-1) for an
+                    array of indefinite size
+    @param[in] tag_handler how CBOR tags should be treated
+    @return whether array creation completed
+    */
+    bool get_cbor_array(const std::size_t len,
+                        const cbor_tag_handler_t tag_handler)
+    {
+        if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
+        {
+            return false;
+        }
+
+        if (len != static_cast<std::size_t>(-1))
+        {
+            for (std::size_t i = 0; i < len; ++i)
+            {
+                if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
+                {
+                    return false;
+                }
+            }
+        }
+        else
+        {
+            while (get() != 0xFF)
+            {
+                if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(false, tag_handler)))
+                {
+                    return false;
+                }
+            }
+        }
+
+        return sax->end_array();
+    }
+
+    /*!
+    @param[in] len  the length of the object or static_cast<std::size_t>(-1) for an
+                    object of indefinite size
+    @param[in] tag_handler how CBOR tags should be treated
+    @return whether object creation completed
+    */
+    bool get_cbor_object(const std::size_t len,
+                         const cbor_tag_handler_t tag_handler)
+    {
+        if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
+        {
+            return false;
+        }
+
+        if (len != 0)
+        {
+            string_t key;
+            if (len != static_cast<std::size_t>(-1))
+            {
+                for (std::size_t i = 0; i < len; ++i)
+                {
+                    get();
+                    if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
+                    {
+                        return false;
+                    }
+
+                    if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
+                    {
+                        return false;
+                    }
+                    key.clear();
+                }
+            }
+            else
+            {
+                while (get() != 0xFF)
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
+                    {
+                        return false;
+                    }
+
+                    if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
+                    {
+                        return false;
+                    }
+                    key.clear();
+                }
+            }
+        }
+
+        return sax->end_object();
+    }
+
+    /////////////
+    // MsgPack //
+    /////////////
+
+    /*!
+    @return whether a valid MessagePack value was passed to the SAX parser
+    */
+    bool parse_msgpack_internal()
+    {
+        switch (get())
+        {
+            // EOF
+            case std::char_traits<char_type>::eof():
+                return unexpect_eof(input_format_t::msgpack, "value");
+
+            // positive fixint
+            case 0x00:
+            case 0x01:
+            case 0x02:
+            case 0x03:
+            case 0x04:
+            case 0x05:
+            case 0x06:
+            case 0x07:
+            case 0x08:
+            case 0x09:
+            case 0x0A:
+            case 0x0B:
+            case 0x0C:
+            case 0x0D:
+            case 0x0E:
+            case 0x0F:
+            case 0x10:
+            case 0x11:
+            case 0x12:
+            case 0x13:
+            case 0x14:
+            case 0x15:
+            case 0x16:
+            case 0x17:
+            case 0x18:
+            case 0x19:
+            case 0x1A:
+            case 0x1B:
+            case 0x1C:
+            case 0x1D:
+            case 0x1E:
+            case 0x1F:
+            case 0x20:
+            case 0x21:
+            case 0x22:
+            case 0x23:
+            case 0x24:
+            case 0x25:
+            case 0x26:
+            case 0x27:
+            case 0x28:
+            case 0x29:
+            case 0x2A:
+            case 0x2B:
+            case 0x2C:
+            case 0x2D:
+            case 0x2E:
+            case 0x2F:
+            case 0x30:
+            case 0x31:
+            case 0x32:
+            case 0x33:
+            case 0x34:
+            case 0x35:
+            case 0x36:
+            case 0x37:
+            case 0x38:
+            case 0x39:
+            case 0x3A:
+            case 0x3B:
+            case 0x3C:
+            case 0x3D:
+            case 0x3E:
+            case 0x3F:
+            case 0x40:
+            case 0x41:
+            case 0x42:
+            case 0x43:
+            case 0x44:
+            case 0x45:
+            case 0x46:
+            case 0x47:
+            case 0x48:
+            case 0x49:
+            case 0x4A:
+            case 0x4B:
+            case 0x4C:
+            case 0x4D:
+            case 0x4E:
+            case 0x4F:
+            case 0x50:
+            case 0x51:
+            case 0x52:
+            case 0x53:
+            case 0x54:
+            case 0x55:
+            case 0x56:
+            case 0x57:
+            case 0x58:
+            case 0x59:
+            case 0x5A:
+            case 0x5B:
+            case 0x5C:
+            case 0x5D:
+            case 0x5E:
+            case 0x5F:
+            case 0x60:
+            case 0x61:
+            case 0x62:
+            case 0x63:
+            case 0x64:
+            case 0x65:
+            case 0x66:
+            case 0x67:
+            case 0x68:
+            case 0x69:
+            case 0x6A:
+            case 0x6B:
+            case 0x6C:
+            case 0x6D:
+            case 0x6E:
+            case 0x6F:
+            case 0x70:
+            case 0x71:
+            case 0x72:
+            case 0x73:
+            case 0x74:
+            case 0x75:
+            case 0x76:
+            case 0x77:
+            case 0x78:
+            case 0x79:
+            case 0x7A:
+            case 0x7B:
+            case 0x7C:
+            case 0x7D:
+            case 0x7E:
+            case 0x7F:
+                return sax->number_unsigned(static_cast<number_unsigned_t>(current));
+
+            // fixmap
+            case 0x80:
+            case 0x81:
+            case 0x82:
+            case 0x83:
+            case 0x84:
+            case 0x85:
+            case 0x86:
+            case 0x87:
+            case 0x88:
+            case 0x89:
+            case 0x8A:
+            case 0x8B:
+            case 0x8C:
+            case 0x8D:
+            case 0x8E:
+            case 0x8F:
+                return get_msgpack_object(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
+
+            // fixarray
+            case 0x90:
+            case 0x91:
+            case 0x92:
+            case 0x93:
+            case 0x94:
+            case 0x95:
+            case 0x96:
+            case 0x97:
+            case 0x98:
+            case 0x99:
+            case 0x9A:
+            case 0x9B:
+            case 0x9C:
+            case 0x9D:
+            case 0x9E:
+            case 0x9F:
+                return get_msgpack_array(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
+
+            // fixstr
+            case 0xA0:
+            case 0xA1:
+            case 0xA2:
+            case 0xA3:
+            case 0xA4:
+            case 0xA5:
+            case 0xA6:
+            case 0xA7:
+            case 0xA8:
+            case 0xA9:
+            case 0xAA:
+            case 0xAB:
+            case 0xAC:
+            case 0xAD:
+            case 0xAE:
+            case 0xAF:
+            case 0xB0:
+            case 0xB1:
+            case 0xB2:
+            case 0xB3:
+            case 0xB4:
+            case 0xB5:
+            case 0xB6:
+            case 0xB7:
+            case 0xB8:
+            case 0xB9:
+            case 0xBA:
+            case 0xBB:
+            case 0xBC:
+            case 0xBD:
+            case 0xBE:
+            case 0xBF:
+            case 0xD9: // str 8
+            case 0xDA: // str 16
+            case 0xDB: // str 32
+            {
+                string_t s;
+                return get_msgpack_string(s) && sax->string(s);
+            }
+
+            case 0xC0: // nil
+                return sax->null();
+
+            case 0xC2: // false
+                return sax->boolean(false);
+
+            case 0xC3: // true
+                return sax->boolean(true);
+
+            case 0xC4: // bin 8
+            case 0xC5: // bin 16
+            case 0xC6: // bin 32
+            case 0xC7: // ext 8
+            case 0xC8: // ext 16
+            case 0xC9: // ext 32
+            case 0xD4: // fixext 1
+            case 0xD5: // fixext 2
+            case 0xD6: // fixext 4
+            case 0xD7: // fixext 8
+            case 0xD8: // fixext 16
+            {
+                binary_t b;
+                return get_msgpack_binary(b) && sax->binary(b);
+            }
+
+            case 0xCA: // float 32
+            {
+                float number{};
+                return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
+            }
+
+            case 0xCB: // float 64
+            {
+                double number{};
+                return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
+            }
+
+            case 0xCC: // uint 8
+            {
+                std::uint8_t number{};
+                return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
+            }
+
+            case 0xCD: // uint 16
+            {
+                std::uint16_t number{};
+                return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
+            }
+
+            case 0xCE: // uint 32
+            {
+                std::uint32_t number{};
+                return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
+            }
+
+            case 0xCF: // uint 64
+            {
+                std::uint64_t number{};
+                return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
+            }
+
+            case 0xD0: // int 8
+            {
+                std::int8_t number{};
+                return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
+            }
+
+            case 0xD1: // int 16
+            {
+                std::int16_t number{};
+                return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
+            }
+
+            case 0xD2: // int 32
+            {
+                std::int32_t number{};
+                return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
+            }
+
+            case 0xD3: // int 64
+            {
+                std::int64_t number{};
+                return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
+            }
+
+            case 0xDC: // array 16
+            {
+                std::uint16_t len{};
+                return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
+            }
+
+            case 0xDD: // array 32
+            {
+                std::uint32_t len{};
+                return get_number(input_format_t::msgpack, len) && get_msgpack_array(conditional_static_cast<std::size_t>(len));
+            }
+
+            case 0xDE: // map 16
+            {
+                std::uint16_t len{};
+                return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
+            }
+
+            case 0xDF: // map 32
+            {
+                std::uint32_t len{};
+                return get_number(input_format_t::msgpack, len) && get_msgpack_object(conditional_static_cast<std::size_t>(len));
+            }
+
+            // negative fixint
+            case 0xE0:
+            case 0xE1:
+            case 0xE2:
+            case 0xE3:
+            case 0xE4:
+            case 0xE5:
+            case 0xE6:
+            case 0xE7:
+            case 0xE8:
+            case 0xE9:
+            case 0xEA:
+            case 0xEB:
+            case 0xEC:
+            case 0xED:
+            case 0xEE:
+            case 0xEF:
+            case 0xF0:
+            case 0xF1:
+            case 0xF2:
+            case 0xF3:
+            case 0xF4:
+            case 0xF5:
+            case 0xF6:
+            case 0xF7:
+            case 0xF8:
+            case 0xF9:
+            case 0xFA:
+            case 0xFB:
+            case 0xFC:
+            case 0xFD:
+            case 0xFE:
+            case 0xFF:
+                return sax->number_integer(static_cast<std::int8_t>(current));
+
+            default: // anything else
+            {
+                auto last_token = get_token_string();
+                return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
+                                        exception_message(input_format_t::msgpack, concat("invalid byte: 0x", last_token), "value"), nullptr));
+            }
+        }
+    }
+
+    /*!
+    @brief reads a MessagePack string
+
+    This function first reads starting bytes to determine the expected
+    string length and then copies this number of bytes into a string.
+
+    @param[out] result  created string
+
+    @return whether string creation completed
+    */
+    bool get_msgpack_string(string_t& result)
+    {
+        if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::msgpack, "string")))
+        {
+            return false;
+        }
+
+        switch (current)
+        {
+            // fixstr
+            case 0xA0:
+            case 0xA1:
+            case 0xA2:
+            case 0xA3:
+            case 0xA4:
+            case 0xA5:
+            case 0xA6:
+            case 0xA7:
+            case 0xA8:
+            case 0xA9:
+            case 0xAA:
+            case 0xAB:
+            case 0xAC:
+            case 0xAD:
+            case 0xAE:
+            case 0xAF:
+            case 0xB0:
+            case 0xB1:
+            case 0xB2:
+            case 0xB3:
+            case 0xB4:
+            case 0xB5:
+            case 0xB6:
+            case 0xB7:
+            case 0xB8:
+            case 0xB9:
+            case 0xBA:
+            case 0xBB:
+            case 0xBC:
+            case 0xBD:
+            case 0xBE:
+            case 0xBF:
+            {
+                return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result);
+            }
+
+            case 0xD9: // str 8
+            {
+                std::uint8_t len{};
+                return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
+            }
+
+            case 0xDA: // str 16
+            {
+                std::uint16_t len{};
+                return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
+            }
+
+            case 0xDB: // str 32
+            {
+                std::uint32_t len{};
+                return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
+            }
+
+            default:
+            {
+                auto last_token = get_token_string();
+                return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
+                                        exception_message(input_format_t::msgpack, concat("expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x", last_token), "string"), nullptr));
+            }
+        }
+    }
+
+    /*!
+    @brief reads a MessagePack byte array
+
+    This function first reads starting bytes to determine the expected
+    byte array length and then copies this number of bytes into a byte array.
+
+    @param[out] result  created byte array
+
+    @return whether byte array creation completed
+    */
+    bool get_msgpack_binary(binary_t& result)
+    {
+        // helper function to set the subtype
+        auto assign_and_return_true = [&result](std::int8_t subtype)
+        {
+            result.set_subtype(static_cast<std::uint8_t>(subtype));
+            return true;
+        };
+
+        switch (current)
+        {
+            case 0xC4: // bin 8
+            {
+                std::uint8_t len{};
+                return get_number(input_format_t::msgpack, len) &&
+                       get_binary(input_format_t::msgpack, len, result);
+            }
+
+            case 0xC5: // bin 16
+            {
+                std::uint16_t len{};
+                return get_number(input_format_t::msgpack, len) &&
+                       get_binary(input_format_t::msgpack, len, result);
+            }
+
+            case 0xC6: // bin 32
+            {
+                std::uint32_t len{};
+                return get_number(input_format_t::msgpack, len) &&
+                       get_binary(input_format_t::msgpack, len, result);
+            }
+
+            case 0xC7: // ext 8
+            {
+                std::uint8_t len{};
+                std::int8_t subtype{};
+                return get_number(input_format_t::msgpack, len) &&
+                       get_number(input_format_t::msgpack, subtype) &&
+                       get_binary(input_format_t::msgpack, len, result) &&
+                       assign_and_return_true(subtype);
+            }
+
+            case 0xC8: // ext 16
+            {
+                std::uint16_t len{};
+                std::int8_t subtype{};
+                return get_number(input_format_t::msgpack, len) &&
+                       get_number(input_format_t::msgpack, subtype) &&
+                       get_binary(input_format_t::msgpack, len, result) &&
+                       assign_and_return_true(subtype);
+            }
+
+            case 0xC9: // ext 32
+            {
+                std::uint32_t len{};
+                std::int8_t subtype{};
+                return get_number(input_format_t::msgpack, len) &&
+                       get_number(input_format_t::msgpack, subtype) &&
+                       get_binary(input_format_t::msgpack, len, result) &&
+                       assign_and_return_true(subtype);
+            }
+
+            case 0xD4: // fixext 1
+            {
+                std::int8_t subtype{};
+                return get_number(input_format_t::msgpack, subtype) &&
+                       get_binary(input_format_t::msgpack, 1, result) &&
+                       assign_and_return_true(subtype);
+            }
+
+            case 0xD5: // fixext 2
+            {
+                std::int8_t subtype{};
+                return get_number(input_format_t::msgpack, subtype) &&
+                       get_binary(input_format_t::msgpack, 2, result) &&
+                       assign_and_return_true(subtype);
+            }
+
+            case 0xD6: // fixext 4
+            {
+                std::int8_t subtype{};
+                return get_number(input_format_t::msgpack, subtype) &&
+                       get_binary(input_format_t::msgpack, 4, result) &&
+                       assign_and_return_true(subtype);
+            }
+
+            case 0xD7: // fixext 8
+            {
+                std::int8_t subtype{};
+                return get_number(input_format_t::msgpack, subtype) &&
+                       get_binary(input_format_t::msgpack, 8, result) &&
+                       assign_and_return_true(subtype);
+            }
+
+            case 0xD8: // fixext 16
+            {
+                std::int8_t subtype{};
+                return get_number(input_format_t::msgpack, subtype) &&
+                       get_binary(input_format_t::msgpack, 16, result) &&
+                       assign_and_return_true(subtype);
+            }
+
+            default:           // LCOV_EXCL_LINE
+                return false;  // LCOV_EXCL_LINE
+        }
+    }
+
+    /*!
+    @param[in] len  the length of the array
+    @return whether array creation completed
+    */
+    bool get_msgpack_array(const std::size_t len)
+    {
+        if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
+        {
+            return false;
+        }
+
+        for (std::size_t i = 0; i < len; ++i)
+        {
+            if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
+            {
+                return false;
+            }
+        }
+
+        return sax->end_array();
+    }
+
+    /*!
+    @param[in] len  the length of the object
+    @return whether object creation completed
+    */
+    bool get_msgpack_object(const std::size_t len)
+    {
+        if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
+        {
+            return false;
+        }
+
+        string_t key;
+        for (std::size_t i = 0; i < len; ++i)
+        {
+            get();
+            if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key)))
+            {
+                return false;
+            }
+
+            if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
+            {
+                return false;
+            }
+            key.clear();
+        }
+
+        return sax->end_object();
+    }
+
+    ////////////
+    // UBJSON //
+    ////////////
+
+    /*!
+    @param[in] get_char  whether a new character should be retrieved from the
+                         input (true, default) or whether the last read
+                         character should be considered instead
+
+    @return whether a valid UBJSON value was passed to the SAX parser
+    */
+    bool parse_ubjson_internal(const bool get_char = true)
+    {
+        return get_ubjson_value(get_char ? get_ignore_noop() : current);
+    }
+
+    /*!
+    @brief reads a UBJSON string
+
+    This function is either called after reading the 'S' byte explicitly
+    indicating a string, or in case of an object key where the 'S' byte can be
+    left out.
+
+    @param[out] result   created string
+    @param[in] get_char  whether a new character should be retrieved from the
+                         input (true, default) or whether the last read
+                         character should be considered instead
+
+    @return whether string creation completed
+    */
+    bool get_ubjson_string(string_t& result, const bool get_char = true)
+    {
+        if (get_char)
+        {
+            get();  // TODO(niels): may we ignore N here?
+        }
+
+        if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "value")))
+        {
+            return false;
+        }
+
+        switch (current)
+        {
+            case 'U':
+            {
+                std::uint8_t len{};
+                return get_number(input_format, len) && get_string(input_format, len, result);
+            }
+
+            case 'i':
+            {
+                std::int8_t len{};
+                return get_number(input_format, len) && get_string(input_format, len, result);
+            }
+
+            case 'I':
+            {
+                std::int16_t len{};
+                return get_number(input_format, len) && get_string(input_format, len, result);
+            }
+
+            case 'l':
+            {
+                std::int32_t len{};
+                return get_number(input_format, len) && get_string(input_format, len, result);
+            }
+
+            case 'L':
+            {
+                std::int64_t len{};
+                return get_number(input_format, len) && get_string(input_format, len, result);
+            }
+
+            case 'u':
+            {
+                if (input_format != input_format_t::bjdata)
+                {
+                    break;
+                }
+                std::uint16_t len{};
+                return get_number(input_format, len) && get_string(input_format, len, result);
+            }
+
+            case 'm':
+            {
+                if (input_format != input_format_t::bjdata)
+                {
+                    break;
+                }
+                std::uint32_t len{};
+                return get_number(input_format, len) && get_string(input_format, len, result);
+            }
+
+            case 'M':
+            {
+                if (input_format != input_format_t::bjdata)
+                {
+                    break;
+                }
+                std::uint64_t len{};
+                return get_number(input_format, len) && get_string(input_format, len, result);
+            }
+
+            default:
+                break;
+        }
+        auto last_token = get_token_string();
+        std::string message;
+
+        if (input_format != input_format_t::bjdata)
+        {
+            message = "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token;
+        }
+        else
+        {
+            message = "expected length type specification (U, i, u, I, m, l, M, L); last byte: 0x" + last_token;
+        }
+        return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format, message, "string"), nullptr));
+    }
+
+    /*!
+    @param[out] dim  an integer vector storing the ND array dimensions
+    @return whether reading ND array size vector is successful
+    */
+    bool get_ubjson_ndarray_size(std::vector<size_t>& dim)
+    {
+        std::pair<std::size_t, char_int_type> size_and_type;
+        size_t dimlen = 0;
+        bool no_ndarray = true;
+
+        if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type, no_ndarray)))
+        {
+            return false;
+        }
+
+        if (size_and_type.first != npos)
+        {
+            if (size_and_type.second != 0)
+            {
+                if (size_and_type.second != 'N')
+                {
+                    for (std::size_t i = 0; i < size_and_type.first; ++i)
+                    {
+                        if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, no_ndarray, size_and_type.second)))
+                        {
+                            return false;
+                        }
+                        dim.push_back(dimlen);
+                    }
+                }
+            }
+            else
+            {
+                for (std::size_t i = 0; i < size_and_type.first; ++i)
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, no_ndarray)))
+                    {
+                        return false;
+                    }
+                    dim.push_back(dimlen);
+                }
+            }
+        }
+        else
+        {
+            while (current != ']')
+            {
+                if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, no_ndarray, current)))
+                {
+                    return false;
+                }
+                dim.push_back(dimlen);
+                get_ignore_noop();
+            }
+        }
+        return true;
+    }
+
+    /*!
+    @param[out] result  determined size
+    @param[in,out] is_ndarray  for input, `true` means already inside an ndarray vector
+                               or ndarray dimension is not allowed; `false` means ndarray
+                               is allowed; for output, `true` means an ndarray is found;
+                               is_ndarray can only return `true` when its initial value
+                               is `false`
+    @param[in] prefix  type marker if already read, otherwise set to 0
+
+    @return whether size determination completed
+    */
+    bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0)
+    {
+        if (prefix == 0)
+        {
+            prefix = get_ignore_noop();
+        }
+
+        switch (prefix)
+        {
+            case 'U':
+            {
+                std::uint8_t number{};
+                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
+                {
+                    return false;
+                }
+                result = static_cast<std::size_t>(number);
+                return true;
+            }
+
+            case 'i':
+            {
+                std::int8_t number{};
+                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
+                {
+                    return false;
+                }
+                if (number < 0)
+                {
+                    return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read,
+                                            exception_message(input_format, "count in an optimized container must be positive", "size"), nullptr));
+                }
+                result = static_cast<std::size_t>(number); // NOLINT(bugprone-signed-char-misuse,cert-str34-c): number is not a char
+                return true;
+            }
+
+            case 'I':
+            {
+                std::int16_t number{};
+                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
+                {
+                    return false;
+                }
+                if (number < 0)
+                {
+                    return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read,
+                                            exception_message(input_format, "count in an optimized container must be positive", "size"), nullptr));
+                }
+                result = static_cast<std::size_t>(number);
+                return true;
+            }
+
+            case 'l':
+            {
+                std::int32_t number{};
+                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
+                {
+                    return false;
+                }
+                if (number < 0)
+                {
+                    return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read,
+                                            exception_message(input_format, "count in an optimized container must be positive", "size"), nullptr));
+                }
+                result = static_cast<std::size_t>(number);
+                return true;
+            }
+
+            case 'L':
+            {
+                std::int64_t number{};
+                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
+                {
+                    return false;
+                }
+                if (number < 0)
+                {
+                    return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read,
+                                            exception_message(input_format, "count in an optimized container must be positive", "size"), nullptr));
+                }
+                if (!value_in_range_of<std::size_t>(number))
+                {
+                    return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408,
+                                            exception_message(input_format, "integer value overflow", "size"), nullptr));
+                }
+                result = static_cast<std::size_t>(number);
+                return true;
+            }
+
+            case 'u':
+            {
+                if (input_format != input_format_t::bjdata)
+                {
+                    break;
+                }
+                std::uint16_t number{};
+                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
+                {
+                    return false;
+                }
+                result = static_cast<std::size_t>(number);
+                return true;
+            }
+
+            case 'm':
+            {
+                if (input_format != input_format_t::bjdata)
+                {
+                    break;
+                }
+                std::uint32_t number{};
+                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
+                {
+                    return false;
+                }
+                result = conditional_static_cast<std::size_t>(number);
+                return true;
+            }
+
+            case 'M':
+            {
+                if (input_format != input_format_t::bjdata)
+                {
+                    break;
+                }
+                std::uint64_t number{};
+                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
+                {
+                    return false;
+                }
+                if (!value_in_range_of<std::size_t>(number))
+                {
+                    return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408,
+                                            exception_message(input_format, "integer value overflow", "size"), nullptr));
+                }
+                result = detail::conditional_static_cast<std::size_t>(number);
+                return true;
+            }
+
+            case '[':
+            {
+                if (input_format != input_format_t::bjdata)
+                {
+                    break;
+                }
+                if (is_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array
+                {
+                    return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimentional vector is not allowed", "size"), nullptr));
+                }
+                std::vector<size_t> dim;
+                if (JSON_HEDLEY_UNLIKELY(!get_ubjson_ndarray_size(dim)))
+                {
+                    return false;
+                }
+                if (dim.size() == 1 || (dim.size() == 2 && dim.at(0) == 1)) // return normal array size if 1D row vector
+                {
+                    result = dim.at(dim.size() - 1);
+                    return true;
+                }
+                if (!dim.empty())  // if ndarray, convert to an object in JData annotated array format
+                {
+                    for (auto i : dim) // test if any dimension in an ndarray is 0, if so, return a 1D empty container
+                    {
+                        if ( i == 0 )
+                        {
+                            result = 0;
+                            return true;
+                        }
+                    }
+
+                    string_t key = "_ArraySize_";
+                    if (JSON_HEDLEY_UNLIKELY(!sax->start_object(3) || !sax->key(key) || !sax->start_array(dim.size())))
+                    {
+                        return false;
+                    }
+                    result = 1;
+                    for (auto i : dim)
+                    {
+                        result *= i;
+                        if (result == 0 || result == npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be npos as it is used to initialize size in get_ubjson_size_type()
+                        {
+                            return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(input_format, "excessive ndarray size caused overflow", "size"), nullptr));
+                        }
+                        if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(static_cast<number_unsigned_t>(i))))
+                        {
+                            return false;
+                        }
+                    }
+                    is_ndarray = true;
+                    return sax->end_array();
+                }
+                result = 0;
+                return true;
+            }
+
+            default:
+                break;
+        }
+        auto last_token = get_token_string();
+        std::string message;
+
+        if (input_format != input_format_t::bjdata)
+        {
+            message = "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token;
+        }
+        else
+        {
+            message = "expected length type specification (U, i, u, I, m, l, M, L) after '#'; last byte: 0x" + last_token;
+        }
+        return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format, message, "size"), nullptr));
+    }
+
+    /*!
+    @brief determine the type and size for a container
+
+    In the optimized UBJSON format, a type and a size can be provided to allow
+    for a more compact representation.
+
+    @param[out] result  pair of the size and the type
+    @param[in] inside_ndarray  whether the parser is parsing an ND array dimensional vector
+
+    @return whether pair creation completed
+    */
+    bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result, bool inside_ndarray = false)
+    {
+        result.first = npos; // size
+        result.second = 0; // type
+        bool is_ndarray = false;
+
+        get_ignore_noop();
+
+        if (current == '$')
+        {
+            result.second = get();  // must not ignore 'N', because 'N' maybe the type
+            if (input_format == input_format_t::bjdata
+                    && JSON_HEDLEY_UNLIKELY(std::binary_search(bjd_optimized_type_markers.begin(), bjd_optimized_type_markers.end(), result.second)))
+            {
+                auto last_token = get_token_string();
+                return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
+                                        exception_message(input_format, concat("marker 0x", last_token, " is not a permitted optimized array type"), "type"), nullptr));
+            }
+
+            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type")))
+            {
+                return false;
+            }
+
+            get_ignore_noop();
+            if (JSON_HEDLEY_UNLIKELY(current != '#'))
+            {
+                if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "value")))
+                {
+                    return false;
+                }
+                auto last_token = get_token_string();
+                return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
+                                        exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr));
+            }
+
+            bool is_error = get_ubjson_size_value(result.first, is_ndarray);
+            if (input_format == input_format_t::bjdata && is_ndarray)
+            {
+                if (inside_ndarray)
+                {
+                    return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read,
+                                            exception_message(input_format, "ndarray can not be recursive", "size"), nullptr));
+                }
+                result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
+            }
+            return is_error;
+        }
+
+        if (current == '#')
+        {
+            bool is_error = get_ubjson_size_value(result.first, is_ndarray);
+            if (input_format == input_format_t::bjdata && is_ndarray)
+            {
+                return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read,
+                                        exception_message(input_format, "ndarray requires both type and size", "size"), nullptr));
+            }
+            return is_error;
+        }
+
+        return true;
+    }
+
+    /*!
+    @param prefix  the previously read or set type prefix
+    @return whether value creation completed
+    */
+    bool get_ubjson_value(const char_int_type prefix)
+    {
+        switch (prefix)
+        {
+            case std::char_traits<char_type>::eof():  // EOF
+                return unexpect_eof(input_format, "value");
+
+            case 'T':  // true
+                return sax->boolean(true);
+            case 'F':  // false
+                return sax->boolean(false);
+
+            case 'Z':  // null
+                return sax->null();
+
+            case 'U':
+            {
+                std::uint8_t number{};
+                return get_number(input_format, number) && sax->number_unsigned(number);
+            }
+
+            case 'i':
+            {
+                std::int8_t number{};
+                return get_number(input_format, number) && sax->number_integer(number);
+            }
+
+            case 'I':
+            {
+                std::int16_t number{};
+                return get_number(input_format, number) && sax->number_integer(number);
+            }
+
+            case 'l':
+            {
+                std::int32_t number{};
+                return get_number(input_format, number) && sax->number_integer(number);
+            }
+
+            case 'L':
+            {
+                std::int64_t number{};
+                return get_number(input_format, number) && sax->number_integer(number);
+            }
+
+            case 'u':
+            {
+                if (input_format != input_format_t::bjdata)
+                {
+                    break;
+                }
+                std::uint16_t number{};
+                return get_number(input_format, number) && sax->number_unsigned(number);
+            }
+
+            case 'm':
+            {
+                if (input_format != input_format_t::bjdata)
+                {
+                    break;
+                }
+                std::uint32_t number{};
+                return get_number(input_format, number) && sax->number_unsigned(number);
+            }
+
+            case 'M':
+            {
+                if (input_format != input_format_t::bjdata)
+                {
+                    break;
+                }
+                std::uint64_t number{};
+                return get_number(input_format, number) && sax->number_unsigned(number);
+            }
+
+            case 'h':
+            {
+                if (input_format != input_format_t::bjdata)
+                {
+                    break;
+                }
+                const auto byte1_raw = get();
+                if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "number")))
+                {
+                    return false;
+                }
+                const auto byte2_raw = get();
+                if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "number")))
+                {
+                    return false;
+                }
+
+                const auto byte1 = static_cast<unsigned char>(byte1_raw);
+                const auto byte2 = static_cast<unsigned char>(byte2_raw);
+
+                // code from RFC 7049, Appendix D, Figure 3:
+                // As half-precision floating-point numbers were only added
+                // to IEEE 754 in 2008, today's programming platforms often
+                // still only have limited support for them. It is very
+                // easy to include at least decoding support for them even
+                // without such support. An example of a small decoder for
+                // half-precision floating-point numbers in the C language
+                // is shown in Fig. 3.
+                const auto half = static_cast<unsigned int>((byte2 << 8u) + byte1);
+                const double val = [&half]
+                {
+                    const int exp = (half >> 10u) & 0x1Fu;
+                    const unsigned int mant = half & 0x3FFu;
+                    JSON_ASSERT(0 <= exp&& exp <= 32);
+                    JSON_ASSERT(mant <= 1024);
+                    switch (exp)
+                    {
+                        case 0:
+                            return std::ldexp(mant, -24);
+                        case 31:
+                            return (mant == 0)
+                            ? std::numeric_limits<double>::infinity()
+                            : std::numeric_limits<double>::quiet_NaN();
+                        default:
+                            return std::ldexp(mant + 1024, exp - 25);
+                    }
+                }();
+                return sax->number_float((half & 0x8000u) != 0
+                                         ? static_cast<number_float_t>(-val)
+                                         : static_cast<number_float_t>(val), "");
+            }
+
+            case 'd':
+            {
+                float number{};
+                return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), "");
+            }
+
+            case 'D':
+            {
+                double number{};
+                return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), "");
+            }
+
+            case 'H':
+            {
+                return get_ubjson_high_precision_number();
+            }
+
+            case 'C':  // char
+            {
+                get();
+                if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "char")))
+                {
+                    return false;
+                }
+                if (JSON_HEDLEY_UNLIKELY(current > 127))
+                {
+                    auto last_token = get_token_string();
+                    return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
+                                            exception_message(input_format, concat("byte after 'C' must be in range 0x00..0x7F; last byte: 0x", last_token), "char"), nullptr));
+                }
+                string_t s(1, static_cast<typename string_t::value_type>(current));
+                return sax->string(s);
+            }
+
+            case 'S':  // string
+            {
+                string_t s;
+                return get_ubjson_string(s) && sax->string(s);
+            }
+
+            case '[':  // array
+                return get_ubjson_array();
+
+            case '{':  // object
+                return get_ubjson_object();
+
+            default: // anything else
+                break;
+        }
+        auto last_token = get_token_string();
+        return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format, "invalid byte: 0x" + last_token, "value"), nullptr));
+    }
+
+    /*!
+    @return whether array creation completed
+    */
+    bool get_ubjson_array()
+    {
+        std::pair<std::size_t, char_int_type> size_and_type;
+        if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
+        {
+            return false;
+        }
+
+        // if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
+        // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]}
+
+        if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
+        {
+            size_and_type.second &= ~(static_cast<char_int_type>(1) << 8);  // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker
+            auto it = std::lower_bound(bjd_types_map.begin(), bjd_types_map.end(), size_and_type.second, [](const bjd_type & p, char_int_type t)
+            {
+                return p.first < t;
+            });
+            string_t key = "_ArrayType_";
+            if (JSON_HEDLEY_UNLIKELY(it == bjd_types_map.end() || it->first != size_and_type.second))
+            {
+                auto last_token = get_token_string();
+                return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
+                                        exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr));
+            }
+
+            string_t type = it->second; // sax->string() takes a reference
+            if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type)))
+            {
+                return false;
+            }
+
+            if (size_and_type.second == 'C')
+            {
+                size_and_type.second = 'U';
+            }
+
+            key = "_ArrayData_";
+            if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) ))
+            {
+                return false;
+            }
+
+            for (std::size_t i = 0; i < size_and_type.first; ++i)
+            {
+                if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
+                {
+                    return false;
+                }
+            }
+
+            return (sax->end_array() && sax->end_object());
+        }
+
+        if (size_and_type.first != npos)
+        {
+            if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
+            {
+                return false;
+            }
+
+            if (size_and_type.second != 0)
+            {
+                if (size_and_type.second != 'N')
+                {
+                    for (std::size_t i = 0; i < size_and_type.first; ++i)
+                    {
+                        if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
+                        {
+                            return false;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for (std::size_t i = 0; i < size_and_type.first; ++i)
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
+                    {
+                        return false;
+                    }
+                }
+            }
+        }
+        else
+        {
+            if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
+            {
+                return false;
+            }
+
+            while (current != ']')
+            {
+                if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal(false)))
+                {
+                    return false;
+                }
+                get_ignore_noop();
+            }
+        }
+
+        return sax->end_array();
+    }
+
+    /*!
+    @return whether object creation completed
+    */
+    bool get_ubjson_object()
+    {
+        std::pair<std::size_t, char_int_type> size_and_type;
+        if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
+        {
+            return false;
+        }
+
+        // do not accept ND-array size in objects in BJData
+        if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
+        {
+            auto last_token = get_token_string();
+            return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
+                                    exception_message(input_format, "BJData object does not support ND-array size in optimized format", "object"), nullptr));
+        }
+
+        string_t key;
+        if (size_and_type.first != npos)
+        {
+            if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
+            {
+                return false;
+            }
+
+            if (size_and_type.second != 0)
+            {
+                for (std::size_t i = 0; i < size_and_type.first; ++i)
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
+                    {
+                        return false;
+                    }
+                    if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
+                    {
+                        return false;
+                    }
+                    key.clear();
+                }
+            }
+            else
+            {
+                for (std::size_t i = 0; i < size_and_type.first; ++i)
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
+                    {
+                        return false;
+                    }
+                    if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
+                    {
+                        return false;
+                    }
+                    key.clear();
+                }
+            }
+        }
+        else
+        {
+            if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
+            {
+                return false;
+            }
+
+            while (current != '}')
+            {
+                if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key)))
+                {
+                    return false;
+                }
+                if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
+                {
+                    return false;
+                }
+                get_ignore_noop();
+                key.clear();
+            }
+        }
+
+        return sax->end_object();
+    }
+
+    // Note, no reader for UBJSON binary types is implemented because they do
+    // not exist
+
+    bool get_ubjson_high_precision_number()
+    {
+        // get size of following number string
+        std::size_t size{};
+        bool no_ndarray = true;
+        auto res = get_ubjson_size_value(size, no_ndarray);
+        if (JSON_HEDLEY_UNLIKELY(!res))
+        {
+            return res;
+        }
+
+        // get number string
+        std::vector<char> number_vector;
+        for (std::size_t i = 0; i < size; ++i)
+        {
+            get();
+            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "number")))
+            {
+                return false;
+            }
+            number_vector.push_back(static_cast<char>(current));
+        }
+
+        // parse number string
+        using ia_type = decltype(detail::input_adapter(number_vector));
+        auto number_lexer = detail::lexer<BasicJsonType, ia_type>(detail::input_adapter(number_vector), false);
+        const auto result_number = number_lexer.scan();
+        const auto number_string = number_lexer.get_token_string();
+        const auto result_remainder = number_lexer.scan();
+
+        using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
+
+        if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
+        {
+            return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read,
+                                    exception_message(input_format, concat("invalid number text: ", number_lexer.get_token_string()), "high-precision number"), nullptr));
+        }
+
+        switch (result_number)
+        {
+            case token_type::value_integer:
+                return sax->number_integer(number_lexer.get_number_integer());
+            case token_type::value_unsigned:
+                return sax->number_unsigned(number_lexer.get_number_unsigned());
+            case token_type::value_float:
+                return sax->number_float(number_lexer.get_number_float(), std::move(number_string));
+            case token_type::uninitialized:
+            case token_type::literal_true:
+            case token_type::literal_false:
+            case token_type::literal_null:
+            case token_type::value_string:
+            case token_type::begin_array:
+            case token_type::begin_object:
+            case token_type::end_array:
+            case token_type::end_object:
+            case token_type::name_separator:
+            case token_type::value_separator:
+            case token_type::parse_error:
+            case token_type::end_of_input:
+            case token_type::literal_or_value:
+            default:
+                return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read,
+                                        exception_message(input_format, concat("invalid number text: ", number_lexer.get_token_string()), "high-precision number"), nullptr));
+        }
+    }
+
+    ///////////////////////
+    // Utility functions //
+    ///////////////////////
+
+    /*!
+    @brief get next character from the input
+
+    This function provides the interface to the used input adapter. It does
+    not throw in case the input reached EOF, but returns a -'ve valued
+    `std::char_traits<char_type>::eof()` in that case.
+
+    @return character read from the input
+    */
+    char_int_type get()
+    {
+        ++chars_read;
+        return current = ia.get_character();
+    }
+
+    /*!
+    @return character read from the input after ignoring all 'N' entries
+    */
+    char_int_type get_ignore_noop()
+    {
+        do
+        {
+            get();
+        }
+        while (current == 'N');
+
+        return current;
+    }
+
+    /*
+    @brief read a number from the input
+
+    @tparam NumberType the type of the number
+    @param[in] format   the current format (for diagnostics)
+    @param[out] result  number of type @a NumberType
+
+    @return whether conversion completed
+
+    @note This function needs to respect the system's endianness, because
+          bytes in CBOR, MessagePack, and UBJSON are stored in network order
+          (big endian) and therefore need reordering on little endian systems.
+          On the other hand, BSON and BJData use little endian and should reorder
+          on big endian systems.
+    */
+    template<typename NumberType, bool InputIsLittleEndian = false>
+    bool get_number(const input_format_t format, NumberType& result)
+    {
+        // step 1: read input into array with system's byte order
+        std::array<std::uint8_t, sizeof(NumberType)> vec{};
+        for (std::size_t i = 0; i < sizeof(NumberType); ++i)
+        {
+            get();
+            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
+            {
+                return false;
+            }
+
+            // reverse byte order prior to conversion if necessary
+            if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
+            {
+                vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
+            }
+            else
+            {
+                vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
+            }
+        }
+
+        // step 2: convert array into number of type T and return
+        std::memcpy(&result, vec.data(), sizeof(NumberType));
+        return true;
+    }
+
+    /*!
+    @brief create a string by reading characters from the input
+
+    @tparam NumberType the type of the number
+    @param[in] format the current format (for diagnostics)
+    @param[in] len number of characters to read
+    @param[out] result string created by reading @a len bytes
+
+    @return whether string creation completed
+
+    @note We can not reserve @a len bytes for the result, because @a len
+          may be too large. Usually, @ref unexpect_eof() detects the end of
+          the input before we run out of string memory.
+    */
+    template<typename NumberType>
+    bool get_string(const input_format_t format,
+                    const NumberType len,
+                    string_t& result)
+    {
+        bool success = true;
+        for (NumberType i = 0; i < len; i++)
+        {
+            get();
+            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "string")))
+            {
+                success = false;
+                break;
+            }
+            result.push_back(static_cast<typename string_t::value_type>(current));
+        }
+        return success;
+    }
+
+    /*!
+    @brief create a byte array by reading bytes from the input
+
+    @tparam NumberType the type of the number
+    @param[in] format the current format (for diagnostics)
+    @param[in] len number of bytes to read
+    @param[out] result byte array created by reading @a len bytes
+
+    @return whether byte array creation completed
+
+    @note We can not reserve @a len bytes for the result, because @a len
+          may be too large. Usually, @ref unexpect_eof() detects the end of
+          the input before we run out of memory.
+    */
+    template<typename NumberType>
+    bool get_binary(const input_format_t format,
+                    const NumberType len,
+                    binary_t& result)
+    {
+        bool success = true;
+        for (NumberType i = 0; i < len; i++)
+        {
+            get();
+            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "binary")))
+            {
+                success = false;
+                break;
+            }
+            result.push_back(static_cast<std::uint8_t>(current));
+        }
+        return success;
+    }
+
+    /*!
+    @param[in] format   the current format (for diagnostics)
+    @param[in] context  further context information (for diagnostics)
+    @return whether the last read character is not EOF
+    */
+    JSON_HEDLEY_NON_NULL(3)
+    bool unexpect_eof(const input_format_t format, const char* context) const
+    {
+        if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof()))
+        {
+            return sax->parse_error(chars_read, "<end of file>",
+                                    parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr));
+        }
+        return true;
+    }
+
+    /*!
+    @return a string representation of the last read byte
+    */
+    std::string get_token_string() const
+    {
+        std::array<char, 3> cr{{}};
+        static_cast<void>((std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
+        return std::string{cr.data()};
+    }
+
+    /*!
+    @param[in] format   the current format
+    @param[in] detail   a detailed error message
+    @param[in] context  further context information
+    @return a message string to use in the parse_error exceptions
+    */
+    std::string exception_message(const input_format_t format,
+                                  const std::string& detail,
+                                  const std::string& context) const
+    {
+        std::string error_msg = "syntax error while parsing ";
+
+        switch (format)
+        {
+            case input_format_t::cbor:
+                error_msg += "CBOR";
+                break;
+
+            case input_format_t::msgpack:
+                error_msg += "MessagePack";
+                break;
+
+            case input_format_t::ubjson:
+                error_msg += "UBJSON";
+                break;
+
+            case input_format_t::bson:
+                error_msg += "BSON";
+                break;
+
+            case input_format_t::bjdata:
+                error_msg += "BJData";
+                break;
+
+            case input_format_t::json: // LCOV_EXCL_LINE
+            default:            // LCOV_EXCL_LINE
+                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
+        }
+
+        return concat(error_msg, ' ', context, ": ", detail);
+    }
+
+  private:
+    static JSON_INLINE_VARIABLE constexpr std::size_t npos = static_cast<std::size_t>(-1);
+
+    /// input adapter
+    InputAdapterType ia;
+
+    /// the current character
+    char_int_type current = std::char_traits<char_type>::eof();
+
+    /// the number of characters read
+    std::size_t chars_read = 0;
+
+    /// whether we can assume little endianness
+    const bool is_little_endian = little_endianness();
+
+    /// input format
+    const input_format_t input_format = input_format_t::json;
+
+    /// the SAX parser
+    json_sax_t* sax = nullptr;
+
+    // excluded markers in bjdata optimized type
+#define JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ \
+    make_array<char_int_type>('F', 'H', 'N', 'S', 'T', 'Z', '[', '{')
+
+#define JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ \
+    make_array<bjd_type>(                      \
+    bjd_type{'C', "char"},                     \
+    bjd_type{'D', "double"},                   \
+    bjd_type{'I', "int16"},                    \
+    bjd_type{'L', "int64"},                    \
+    bjd_type{'M', "uint64"},                   \
+    bjd_type{'U', "uint8"},                    \
+    bjd_type{'d', "single"},                   \
+    bjd_type{'i', "int8"},                     \
+    bjd_type{'l', "int32"},                    \
+    bjd_type{'m', "uint32"},                   \
+    bjd_type{'u', "uint16"})
+
+  JSON_PRIVATE_UNLESS_TESTED:
+    // lookup tables
+    // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
+    const decltype(JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_) bjd_optimized_type_markers =
+        JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_;
+
+    using bjd_type = std::pair<char_int_type, string_t>;
+    // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
+    const decltype(JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_) bjd_types_map =
+        JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_;
+
+#undef JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_
+#undef JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_
+};
+
+#ifndef JSON_HAS_CPP_17
+    template<typename BasicJsonType, typename InputAdapterType, typename SAX>
+    constexpr std::size_t binary_reader<BasicJsonType, InputAdapterType, SAX>::npos;
+#endif
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/input/input_adapters.hpp>
+
+// #include <qualla/detail/input/lexer.hpp>
+
+// #include <qualla/detail/input/parser.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <cmath> // isfinite
+#include <cstdint> // uint8_t
+#include <functional> // function
+#include <string> // string
+#include <utility> // move
+#include <vector> // vector
+
+// #include <qualla/detail/exceptions.hpp>
+
+// #include <qualla/detail/input/input_adapters.hpp>
+
+// #include <qualla/detail/input/json_sax.hpp>
+
+// #include <qualla/detail/input/lexer.hpp>
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/meta/is_sax.hpp>
+
+// #include <qualla/detail/string_concat.hpp>
+
+// #include <qualla/detail/value_t.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+////////////
+// parser //
+////////////
+
+enum class parse_event_t : std::uint8_t
+{
+    /// the parser read `{` and started to process a JSON object
+    object_start,
+    /// the parser read `}` and finished processing a JSON object
+    object_end,
+    /// the parser read `[` and started to process a JSON array
+    array_start,
+    /// the parser read `]` and finished processing a JSON array
+    array_end,
+    /// the parser read a key of a value in an object
+    key,
+    /// the parser finished reading a JSON value
+    value
+};
+
+template<typename BasicJsonType>
+using parser_callback_t =
+    std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>;
+
+/*!
+@brief syntax analysis
+
+This class implements a recursive descent parser.
+*/
+template<typename BasicJsonType, typename InputAdapterType>
+class parser
+{
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+    using string_t = typename BasicJsonType::string_t;
+    using lexer_t = lexer<BasicJsonType, InputAdapterType>;
+    using token_type = typename lexer_t::token_type;
+
+  public:
+    /// a parser reading from an input adapter
+    explicit parser(InputAdapterType&& adapter,
+                    const parser_callback_t<BasicJsonType> cb = nullptr,
+                    const bool allow_exceptions_ = true,
+                    const bool skip_comments = false)
+        : callback(cb)
+        , m_lexer(std::move(adapter), skip_comments)
+        , allow_exceptions(allow_exceptions_)
+    {
+        // read first token
+        get_token();
+    }
+
+    /*!
+    @brief public parser interface
+
+    @param[in] strict      whether to expect the last token to be EOF
+    @param[in,out] result  parsed JSON value
+
+    @throw parse_error.101 in case of an unexpected token
+    @throw parse_error.102 if to_unicode fails or surrogate error
+    @throw parse_error.103 if to_unicode fails
+    */
+    void parse(const bool strict, BasicJsonType& result)
+    {
+        if (callback)
+        {
+            json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
+            sax_parse_internal(&sdp);
+
+            // in strict mode, input must be completely read
+            if (strict && (get_token() != token_type::end_of_input))
+            {
+                sdp.parse_error(m_lexer.get_position(),
+                                m_lexer.get_token_string(),
+                                parse_error::create(101, m_lexer.get_position(),
+                                                    exception_message(token_type::end_of_input, "value"), nullptr));
+            }
+
+            // in case of an error, return discarded value
+            if (sdp.is_errored())
+            {
+                result = value_t::discarded;
+                return;
+            }
+
+            // set top-level value to null if it was discarded by the callback
+            // function
+            if (result.is_discarded())
+            {
+                result = nullptr;
+            }
+        }
+        else
+        {
+            json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
+            sax_parse_internal(&sdp);
+
+            // in strict mode, input must be completely read
+            if (strict && (get_token() != token_type::end_of_input))
+            {
+                sdp.parse_error(m_lexer.get_position(),
+                                m_lexer.get_token_string(),
+                                parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr));
+            }
+
+            // in case of an error, return discarded value
+            if (sdp.is_errored())
+            {
+                result = value_t::discarded;
+                return;
+            }
+        }
+
+        result.assert_invariant();
+    }
+
+    /*!
+    @brief public accept interface
+
+    @param[in] strict  whether to expect the last token to be EOF
+    @return whether the input is a proper JSON text
+    */
+    bool accept(const bool strict = true)
+    {
+        json_sax_acceptor<BasicJsonType> sax_acceptor;
+        return sax_parse(&sax_acceptor, strict);
+    }
+
+    template<typename SAX>
+    JSON_HEDLEY_NON_NULL(2)
+    bool sax_parse(SAX* sax, const bool strict = true)
+    {
+        (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
+        const bool result = sax_parse_internal(sax);
+
+        // strict mode: next byte must be EOF
+        if (result && strict && (get_token() != token_type::end_of_input))
+        {
+            return sax->parse_error(m_lexer.get_position(),
+                                    m_lexer.get_token_string(),
+                                    parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr));
+        }
+
+        return result;
+    }
+
+  private:
+    template<typename SAX>
+    JSON_HEDLEY_NON_NULL(2)
+    bool sax_parse_internal(SAX* sax)
+    {
+        // stack to remember the hierarchy of structured values we are parsing
+        // true = array; false = object
+        std::vector<bool> states;
+        // value to avoid a goto (see comment where set to true)
+        bool skip_to_state_evaluation = false;
+
+        while (true)
+        {
+            if (!skip_to_state_evaluation)
+            {
+                // invariant: get_token() was called before each iteration
+                switch (last_token)
+                {
+                    case token_type::begin_object:
+                    {
+                        if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
+                        {
+                            return false;
+                        }
+
+                        // closing } -> we are done
+                        if (get_token() == token_type::end_object)
+                        {
+                            if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
+                            {
+                                return false;
+                            }
+                            break;
+                        }
+
+                        // parse key
+                        if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
+                        {
+                            return sax->parse_error(m_lexer.get_position(),
+                                                    m_lexer.get_token_string(),
+                                                    parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr));
+                        }
+                        if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
+                        {
+                            return false;
+                        }
+
+                        // parse separator (:)
+                        if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
+                        {
+                            return sax->parse_error(m_lexer.get_position(),
+                                                    m_lexer.get_token_string(),
+                                                    parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr));
+                        }
+
+                        // remember we are now inside an object
+                        states.push_back(false);
+
+                        // parse values
+                        get_token();
+                        continue;
+                    }
+
+                    case token_type::begin_array:
+                    {
+                        if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
+                        {
+                            return false;
+                        }
+
+                        // closing ] -> we are done
+                        if (get_token() == token_type::end_array)
+                        {
+                            if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
+                            {
+                                return false;
+                            }
+                            break;
+                        }
+
+                        // remember we are now inside an array
+                        states.push_back(true);
+
+                        // parse values (no need to call get_token)
+                        continue;
+                    }
+
+                    case token_type::value_float:
+                    {
+                        const auto res = m_lexer.get_number_float();
+
+                        if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res)))
+                        {
+                            return sax->parse_error(m_lexer.get_position(),
+                                                    m_lexer.get_token_string(),
+                                                    out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr));
+                        }
+
+                        if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string())))
+                        {
+                            return false;
+                        }
+
+                        break;
+                    }
+
+                    case token_type::literal_false:
+                    {
+                        if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false)))
+                        {
+                            return false;
+                        }
+                        break;
+                    }
+
+                    case token_type::literal_null:
+                    {
+                        if (JSON_HEDLEY_UNLIKELY(!sax->null()))
+                        {
+                            return false;
+                        }
+                        break;
+                    }
+
+                    case token_type::literal_true:
+                    {
+                        if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true)))
+                        {
+                            return false;
+                        }
+                        break;
+                    }
+
+                    case token_type::value_integer:
+                    {
+                        if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer())))
+                        {
+                            return false;
+                        }
+                        break;
+                    }
+
+                    case token_type::value_string:
+                    {
+                        if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string())))
+                        {
+                            return false;
+                        }
+                        break;
+                    }
+
+                    case token_type::value_unsigned:
+                    {
+                        if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned())))
+                        {
+                            return false;
+                        }
+                        break;
+                    }
+
+                    case token_type::parse_error:
+                    {
+                        // using "uninitialized" to avoid "expected" message
+                        return sax->parse_error(m_lexer.get_position(),
+                                                m_lexer.get_token_string(),
+                                                parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr));
+                    }
+
+                    case token_type::uninitialized:
+                    case token_type::end_array:
+                    case token_type::end_object:
+                    case token_type::name_separator:
+                    case token_type::value_separator:
+                    case token_type::end_of_input:
+                    case token_type::literal_or_value:
+                    default: // the last token was unexpected
+                    {
+                        return sax->parse_error(m_lexer.get_position(),
+                                                m_lexer.get_token_string(),
+                                                parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr));
+                    }
+                }
+            }
+            else
+            {
+                skip_to_state_evaluation = false;
+            }
+
+            // we reached this line after we successfully parsed a value
+            if (states.empty())
+            {
+                // empty stack: we reached the end of the hierarchy: done
+                return true;
+            }
+
+            if (states.back())  // array
+            {
+                // comma -> next value
+                if (get_token() == token_type::value_separator)
+                {
+                    // parse a new value
+                    get_token();
+                    continue;
+                }
+
+                // closing ]
+                if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
+                {
+                    if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
+                    {
+                        return false;
+                    }
+
+                    // We are done with this array. Before we can parse a
+                    // new value, we need to evaluate the new state first.
+                    // By setting skip_to_state_evaluation to false, we
+                    // are effectively jumping to the beginning of this if.
+                    JSON_ASSERT(!states.empty());
+                    states.pop_back();
+                    skip_to_state_evaluation = true;
+                    continue;
+                }
+
+                return sax->parse_error(m_lexer.get_position(),
+                                        m_lexer.get_token_string(),
+                                        parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array, "array"), nullptr));
+            }
+
+            // states.back() is false -> object
+
+            // comma -> next value
+            if (get_token() == token_type::value_separator)
+            {
+                // parse key
+                if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
+                {
+                    return sax->parse_error(m_lexer.get_position(),
+                                            m_lexer.get_token_string(),
+                                            parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr));
+                }
+
+                if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
+                {
+                    return false;
+                }
+
+                // parse separator (:)
+                if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
+                {
+                    return sax->parse_error(m_lexer.get_position(),
+                                            m_lexer.get_token_string(),
+                                            parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr));
+                }
+
+                // parse values
+                get_token();
+                continue;
+            }
+
+            // closing }
+            if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
+            {
+                if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
+                {
+                    return false;
+                }
+
+                // We are done with this object. Before we can parse a
+                // new value, we need to evaluate the new state first.
+                // By setting skip_to_state_evaluation to false, we
+                // are effectively jumping to the beginning of this if.
+                JSON_ASSERT(!states.empty());
+                states.pop_back();
+                skip_to_state_evaluation = true;
+                continue;
+            }
+
+            return sax->parse_error(m_lexer.get_position(),
+                                    m_lexer.get_token_string(),
+                                    parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object, "object"), nullptr));
+        }
+    }
+
+    /// get next token from lexer
+    token_type get_token()
+    {
+        return last_token = m_lexer.scan();
+    }
+
+    std::string exception_message(const token_type expected, const std::string& context)
+    {
+        std::string error_msg = "syntax error ";
+
+        if (!context.empty())
+        {
+            error_msg += concat("while parsing ", context, ' ');
+        }
+
+        error_msg += "- ";
+
+        if (last_token == token_type::parse_error)
+        {
+            error_msg += concat(m_lexer.get_error_message(), "; last read: '",
+                                m_lexer.get_token_string(), '\'');
+        }
+        else
+        {
+            error_msg += concat("unexpected ", lexer_t::token_type_name(last_token));
+        }
+
+        if (expected != token_type::uninitialized)
+        {
+            error_msg += concat("; expected ", lexer_t::token_type_name(expected));
+        }
+
+        return error_msg;
+    }
+
+  private:
+    /// callback function
+    const parser_callback_t<BasicJsonType> callback = nullptr;
+    /// the type of the last read token
+    token_type last_token = token_type::uninitialized;
+    /// the lexer
+    lexer_t m_lexer;
+    /// whether to throw exceptions in case of errors
+    const bool allow_exceptions = true;
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/iterators/internal_iterator.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+// #include <qualla/detail/abi_macros.hpp>
+
+// #include <qualla/detail/iterators/primitive_iterator.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <cstddef> // ptrdiff_t
+#include <limits>  // numeric_limits
+
+// #include <qualla/detail/macro_scope.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+/*
+@brief an iterator for primitive JSON types
+
+This class models an iterator for primitive JSON types (boolean, number,
+string). It's only purpose is to allow the iterator/const_iterator classes
+to "iterate" over primitive values. Internally, the iterator is modeled by
+a `difference_type` variable. Value begin_value (`0`) models the begin,
+end_value (`1`) models past the end.
+*/
+class primitive_iterator_t
+{
+  private:
+    using difference_type = std::ptrdiff_t;
+    static constexpr difference_type begin_value = 0;
+    static constexpr difference_type end_value = begin_value + 1;
+
+  JSON_PRIVATE_UNLESS_TESTED:
+    /// iterator as signed integer type
+    difference_type m_it = (std::numeric_limits<std::ptrdiff_t>::min)();
+
+  public:
+    constexpr difference_type get_value() const noexcept
+    {
+        return m_it;
+    }
+
+    /// set iterator to a defined beginning
+    void set_begin() noexcept
+    {
+        m_it = begin_value;
+    }
+
+    /// set iterator to a defined past the end
+    void set_end() noexcept
+    {
+        m_it = end_value;
+    }
+
+    /// return whether the iterator can be dereferenced
+    constexpr bool is_begin() const noexcept
+    {
+        return m_it == begin_value;
+    }
+
+    /// return whether the iterator is at end
+    constexpr bool is_end() const noexcept
+    {
+        return m_it == end_value;
+    }
+
+    friend constexpr bool operator==(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept
+    {
+        return lhs.m_it == rhs.m_it;
+    }
+
+    friend constexpr bool operator<(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept
+    {
+        return lhs.m_it < rhs.m_it;
+    }
+
+    primitive_iterator_t operator+(difference_type n) noexcept
+    {
+        auto result = *this;
+        result += n;
+        return result;
+    }
+
+    friend constexpr difference_type operator-(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept
+    {
+        return lhs.m_it - rhs.m_it;
+    }
+
+    primitive_iterator_t& operator++() noexcept
+    {
+        ++m_it;
+        return *this;
+    }
+
+    primitive_iterator_t operator++(int)& noexcept // NOLINT(cert-dcl21-cpp)
+    {
+        auto result = *this;
+        ++m_it;
+        return result;
+    }
+
+    primitive_iterator_t& operator--() noexcept
+    {
+        --m_it;
+        return *this;
+    }
+
+    primitive_iterator_t operator--(int)& noexcept // NOLINT(cert-dcl21-cpp)
+    {
+        auto result = *this;
+        --m_it;
+        return result;
+    }
+
+    primitive_iterator_t& operator+=(difference_type n) noexcept
+    {
+        m_it += n;
+        return *this;
+    }
+
+    primitive_iterator_t& operator-=(difference_type n) noexcept
+    {
+        m_it -= n;
+        return *this;
+    }
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+/*!
+@brief an iterator value
+
+@note This structure could easily be a union, but MSVC currently does not allow
+unions members with complex constructors, see https://github.com/nlohmann/json/pull/105.
+*/
+template<typename BasicJsonType> struct internal_iterator
+{
+    /// iterator for JSON objects
+    typename BasicJsonType::object_t::iterator object_iterator {};
+    /// iterator for JSON arrays
+    typename BasicJsonType::array_t::iterator array_iterator {};
+    /// generic iterator for all other types
+    primitive_iterator_t primitive_iterator {};
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/iterators/iter_impl.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <iterator> // iterator, random_access_iterator_tag, bidirectional_iterator_tag, advance, next
+#include <type_traits> // conditional, is_const, remove_const
+
+// #include <qualla/detail/exceptions.hpp>
+
+// #include <qualla/detail/iterators/internal_iterator.hpp>
+
+// #include <qualla/detail/iterators/primitive_iterator.hpp>
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/meta/cpp_future.hpp>
+
+// #include <qualla/detail/meta/type_traits.hpp>
+
+// #include <qualla/detail/value_t.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+// forward declare, to be able to friend it later on
+template<typename IteratorType> class iteration_proxy;
+template<typename IteratorType> class iteration_proxy_value;
+
+/*!
+@brief a template for a bidirectional iterator for the @ref basic_json class
+This class implements a both iterators (iterator and const_iterator) for the
+@ref basic_json class.
+@note An iterator is called *initialized* when a pointer to a JSON value has
+      been set (e.g., by a constructor or a copy assignment). If the iterator is
+      default-constructed, it is *uninitialized* and most methods are undefined.
+      **The library uses assertions to detect calls on uninitialized iterators.**
+@requirement The class satisfies the following concept requirements:
+-
+[BidirectionalIterator](https://en.cppreference.com/w/cpp/named_req/BidirectionalIterator):
+  The iterator that can be moved can be moved in both directions (i.e.
+  incremented and decremented).
+@since version 1.0.0, simplified in version 2.0.9, change to bidirectional
+       iterators in version 3.0.0 (see https://github.com/nlohmann/json/issues/593)
+*/
+template<typename BasicJsonType>
+class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions)
+{
+    /// the iterator with BasicJsonType of different const-ness
+    using other_iter_impl = iter_impl<typename std::conditional<std::is_const<BasicJsonType>::value, typename std::remove_const<BasicJsonType>::type, const BasicJsonType>::type>;
+    /// allow basic_json to access private members
+    friend other_iter_impl;
+    friend BasicJsonType;
+    friend iteration_proxy<iter_impl>;
+    friend iteration_proxy_value<iter_impl>;
+
+    using object_t = typename BasicJsonType::object_t;
+    using array_t = typename BasicJsonType::array_t;
+    // make sure BasicJsonType is basic_json or const basic_json
+    static_assert(is_basic_json<typename std::remove_const<BasicJsonType>::type>::value,
+                  "iter_impl only accepts (const) basic_json");
+    // superficial check for the LegacyBidirectionalIterator named requirement
+    static_assert(std::is_base_of<std::bidirectional_iterator_tag, std::bidirectional_iterator_tag>::value
+                  &&  std::is_base_of<std::bidirectional_iterator_tag, typename std::iterator_traits<typename array_t::iterator>::iterator_category>::value,
+                  "basic_json iterator assumes array and object type iterators satisfy the LegacyBidirectionalIterator named requirement.");
+
+  public:
+    /// The std::iterator class template (used as a base class to provide typedefs) is deprecated in C++17.
+    /// The C++ Standard has never required user-defined iterators to derive from std::iterator.
+    /// A user-defined iterator should provide publicly accessible typedefs named
+    /// iterator_category, value_type, difference_type, pointer, and reference.
+    /// Note that value_type is required to be non-const, even for constant iterators.
+    using iterator_category = std::bidirectional_iterator_tag;
+
+    /// the type of the values when the iterator is dereferenced
+    using value_type = typename BasicJsonType::value_type;
+    /// a type to represent differences between iterators
+    using difference_type = typename BasicJsonType::difference_type;
+    /// defines a pointer to the type iterated over (value_type)
+    using pointer = typename std::conditional<std::is_const<BasicJsonType>::value,
+          typename BasicJsonType::const_pointer,
+          typename BasicJsonType::pointer>::type;
+    /// defines a reference to the type iterated over (value_type)
+    using reference =
+        typename std::conditional<std::is_const<BasicJsonType>::value,
+        typename BasicJsonType::const_reference,
+        typename BasicJsonType::reference>::type;
+
+    iter_impl() = default;
+    ~iter_impl() = default;
+    iter_impl(iter_impl&&) noexcept = default;
+    iter_impl& operator=(iter_impl&&) noexcept = default;
+
+    /*!
+    @brief constructor for a given JSON instance
+    @param[in] object  pointer to a JSON object for this iterator
+    @pre object != nullptr
+    @post The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    explicit iter_impl(pointer object) noexcept : m_object(object)
+    {
+        JSON_ASSERT(m_object != nullptr);
+
+        switch (m_object->m_type)
+        {
+            case value_t::object:
+            {
+                m_it.object_iterator = typename object_t::iterator();
+                break;
+            }
+
+            case value_t::array:
+            {
+                m_it.array_iterator = typename array_t::iterator();
+                break;
+            }
+
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                m_it.primitive_iterator = primitive_iterator_t();
+                break;
+            }
+        }
+    }
+
+    /*!
+    @note The conventional copy constructor and copy assignment are implicitly
+          defined. Combined with the following converting constructor and
+          assignment, they support: (1) copy from iterator to iterator, (2)
+          copy from const iterator to const iterator, and (3) conversion from
+          iterator to const iterator. However conversion from const iterator
+          to iterator is not defined.
+    */
+
+    /*!
+    @brief const copy constructor
+    @param[in] other const iterator to copy from
+    @note This copy constructor had to be defined explicitly to circumvent a bug
+          occurring on msvc v19.0 compiler (VS 2015) debug build. For more
+          information refer to: https://github.com/nlohmann/json/issues/1608
+    */
+    iter_impl(const iter_impl<const BasicJsonType>& other) noexcept
+        : m_object(other.m_object), m_it(other.m_it)
+    {}
+
+    /*!
+    @brief converting assignment
+    @param[in] other const iterator to copy from
+    @return const/non-const iterator
+    @note It is not checked whether @a other is initialized.
+    */
+    iter_impl& operator=(const iter_impl<const BasicJsonType>& other) noexcept
+    {
+        if (&other != this)
+        {
+            m_object = other.m_object;
+            m_it = other.m_it;
+        }
+        return *this;
+    }
+
+    /*!
+    @brief converting constructor
+    @param[in] other  non-const iterator to copy from
+    @note It is not checked whether @a other is initialized.
+    */
+    iter_impl(const iter_impl<typename std::remove_const<BasicJsonType>::type>& other) noexcept
+        : m_object(other.m_object), m_it(other.m_it)
+    {}
+
+    /*!
+    @brief converting assignment
+    @param[in] other  non-const iterator to copy from
+    @return const/non-const iterator
+    @note It is not checked whether @a other is initialized.
+    */
+    iter_impl& operator=(const iter_impl<typename std::remove_const<BasicJsonType>::type>& other) noexcept // NOLINT(cert-oop54-cpp)
+    {
+        m_object = other.m_object;
+        m_it = other.m_it;
+        return *this;
+    }
+
+  JSON_PRIVATE_UNLESS_TESTED:
+    /*!
+    @brief set the iterator to the first value
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    void set_begin() noexcept
+    {
+        JSON_ASSERT(m_object != nullptr);
+
+        switch (m_object->m_type)
+        {
+            case value_t::object:
+            {
+                m_it.object_iterator = m_object->m_value.object->begin();
+                break;
+            }
+
+            case value_t::array:
+            {
+                m_it.array_iterator = m_object->m_value.array->begin();
+                break;
+            }
+
+            case value_t::null:
+            {
+                // set to end so begin()==end() is true: null is empty
+                m_it.primitive_iterator.set_end();
+                break;
+            }
+
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                m_it.primitive_iterator.set_begin();
+                break;
+            }
+        }
+    }
+
+    /*!
+    @brief set the iterator past the last value
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    void set_end() noexcept
+    {
+        JSON_ASSERT(m_object != nullptr);
+
+        switch (m_object->m_type)
+        {
+            case value_t::object:
+            {
+                m_it.object_iterator = m_object->m_value.object->end();
+                break;
+            }
+
+            case value_t::array:
+            {
+                m_it.array_iterator = m_object->m_value.array->end();
+                break;
+            }
+
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                m_it.primitive_iterator.set_end();
+                break;
+            }
+        }
+    }
+
+  public:
+    /*!
+    @brief return a reference to the value pointed to by the iterator
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    reference operator*() const
+    {
+        JSON_ASSERT(m_object != nullptr);
+
+        switch (m_object->m_type)
+        {
+            case value_t::object:
+            {
+                JSON_ASSERT(m_it.object_iterator != m_object->m_value.object->end());
+                return m_it.object_iterator->second;
+            }
+
+            case value_t::array:
+            {
+                JSON_ASSERT(m_it.array_iterator != m_object->m_value.array->end());
+                return *m_it.array_iterator;
+            }
+
+            case value_t::null:
+                JSON_THROW(invalid_iterator::create(214, "cannot get value", m_object));
+
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                if (JSON_HEDLEY_LIKELY(m_it.primitive_iterator.is_begin()))
+                {
+                    return *m_object;
+                }
+
+                JSON_THROW(invalid_iterator::create(214, "cannot get value", m_object));
+            }
+        }
+    }
+
+    /*!
+    @brief dereference the iterator
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    pointer operator->() const
+    {
+        JSON_ASSERT(m_object != nullptr);
+
+        switch (m_object->m_type)
+        {
+            case value_t::object:
+            {
+                JSON_ASSERT(m_it.object_iterator != m_object->m_value.object->end());
+                return &(m_it.object_iterator->second);
+            }
+
+            case value_t::array:
+            {
+                JSON_ASSERT(m_it.array_iterator != m_object->m_value.array->end());
+                return &*m_it.array_iterator;
+            }
+
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                if (JSON_HEDLEY_LIKELY(m_it.primitive_iterator.is_begin()))
+                {
+                    return m_object;
+                }
+
+                JSON_THROW(invalid_iterator::create(214, "cannot get value", m_object));
+            }
+        }
+    }
+
+    /*!
+    @brief post-increment (it++)
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    iter_impl operator++(int)& // NOLINT(cert-dcl21-cpp)
+    {
+        auto result = *this;
+        ++(*this);
+        return result;
+    }
+
+    /*!
+    @brief pre-increment (++it)
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    iter_impl& operator++()
+    {
+        JSON_ASSERT(m_object != nullptr);
+
+        switch (m_object->m_type)
+        {
+            case value_t::object:
+            {
+                std::advance(m_it.object_iterator, 1);
+                break;
+            }
+
+            case value_t::array:
+            {
+                std::advance(m_it.array_iterator, 1);
+                break;
+            }
+
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                ++m_it.primitive_iterator;
+                break;
+            }
+        }
+
+        return *this;
+    }
+
+    /*!
+    @brief post-decrement (it--)
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    iter_impl operator--(int)& // NOLINT(cert-dcl21-cpp)
+    {
+        auto result = *this;
+        --(*this);
+        return result;
+    }
+
+    /*!
+    @brief pre-decrement (--it)
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    iter_impl& operator--()
+    {
+        JSON_ASSERT(m_object != nullptr);
+
+        switch (m_object->m_type)
+        {
+            case value_t::object:
+            {
+                std::advance(m_it.object_iterator, -1);
+                break;
+            }
+
+            case value_t::array:
+            {
+                std::advance(m_it.array_iterator, -1);
+                break;
+            }
+
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                --m_it.primitive_iterator;
+                break;
+            }
+        }
+
+        return *this;
+    }
+
+    /*!
+    @brief comparison: equal
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    template < typename IterImpl, detail::enable_if_t < (std::is_same<IterImpl, iter_impl>::value || std::is_same<IterImpl, other_iter_impl>::value), std::nullptr_t > = nullptr >
+    bool operator==(const IterImpl& other) const
+    {
+        // if objects are not the same, the comparison is undefined
+        if (JSON_HEDLEY_UNLIKELY(m_object != other.m_object))
+        {
+            JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers", m_object));
+        }
+
+        JSON_ASSERT(m_object != nullptr);
+
+        switch (m_object->m_type)
+        {
+            case value_t::object:
+                return (m_it.object_iterator == other.m_it.object_iterator);
+
+            case value_t::array:
+                return (m_it.array_iterator == other.m_it.array_iterator);
+
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+                return (m_it.primitive_iterator == other.m_it.primitive_iterator);
+        }
+    }
+
+    /*!
+    @brief comparison: not equal
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    template < typename IterImpl, detail::enable_if_t < (std::is_same<IterImpl, iter_impl>::value || std::is_same<IterImpl, other_iter_impl>::value), std::nullptr_t > = nullptr >
+    bool operator!=(const IterImpl& other) const
+    {
+        return !operator==(other);
+    }
+
+    /*!
+    @brief comparison: smaller
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    bool operator<(const iter_impl& other) const
+    {
+        // if objects are not the same, the comparison is undefined
+        if (JSON_HEDLEY_UNLIKELY(m_object != other.m_object))
+        {
+            JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers", m_object));
+        }
+
+        JSON_ASSERT(m_object != nullptr);
+
+        switch (m_object->m_type)
+        {
+            case value_t::object:
+                JSON_THROW(invalid_iterator::create(213, "cannot compare order of object iterators", m_object));
+
+            case value_t::array:
+                return (m_it.array_iterator < other.m_it.array_iterator);
+
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+                return (m_it.primitive_iterator < other.m_it.primitive_iterator);
+        }
+    }
+
+    /*!
+    @brief comparison: less than or equal
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    bool operator<=(const iter_impl& other) const
+    {
+        return !other.operator < (*this);
+    }
+
+    /*!
+    @brief comparison: greater than
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    bool operator>(const iter_impl& other) const
+    {
+        return !operator<=(other);
+    }
+
+    /*!
+    @brief comparison: greater than or equal
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    bool operator>=(const iter_impl& other) const
+    {
+        return !operator<(other);
+    }
+
+    /*!
+    @brief add to iterator
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    iter_impl& operator+=(difference_type i)
+    {
+        JSON_ASSERT(m_object != nullptr);
+
+        switch (m_object->m_type)
+        {
+            case value_t::object:
+                JSON_THROW(invalid_iterator::create(209, "cannot use offsets with object iterators", m_object));
+
+            case value_t::array:
+            {
+                std::advance(m_it.array_iterator, i);
+                break;
+            }
+
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                m_it.primitive_iterator += i;
+                break;
+            }
+        }
+
+        return *this;
+    }
+
+    /*!
+    @brief subtract from iterator
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    iter_impl& operator-=(difference_type i)
+    {
+        return operator+=(-i);
+    }
+
+    /*!
+    @brief add to iterator
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    iter_impl operator+(difference_type i) const
+    {
+        auto result = *this;
+        result += i;
+        return result;
+    }
+
+    /*!
+    @brief addition of distance and iterator
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    friend iter_impl operator+(difference_type i, const iter_impl& it)
+    {
+        auto result = it;
+        result += i;
+        return result;
+    }
+
+    /*!
+    @brief subtract from iterator
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    iter_impl operator-(difference_type i) const
+    {
+        auto result = *this;
+        result -= i;
+        return result;
+    }
+
+    /*!
+    @brief return difference
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    difference_type operator-(const iter_impl& other) const
+    {
+        JSON_ASSERT(m_object != nullptr);
+
+        switch (m_object->m_type)
+        {
+            case value_t::object:
+                JSON_THROW(invalid_iterator::create(209, "cannot use offsets with object iterators", m_object));
+
+            case value_t::array:
+                return m_it.array_iterator - other.m_it.array_iterator;
+
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+                return m_it.primitive_iterator - other.m_it.primitive_iterator;
+        }
+    }
+
+    /*!
+    @brief access to successor
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    reference operator[](difference_type n) const
+    {
+        JSON_ASSERT(m_object != nullptr);
+
+        switch (m_object->m_type)
+        {
+            case value_t::object:
+                JSON_THROW(invalid_iterator::create(208, "cannot use operator[] for object iterators", m_object));
+
+            case value_t::array:
+                return *std::next(m_it.array_iterator, n);
+
+            case value_t::null:
+                JSON_THROW(invalid_iterator::create(214, "cannot get value", m_object));
+
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                if (JSON_HEDLEY_LIKELY(m_it.primitive_iterator.get_value() == -n))
+                {
+                    return *m_object;
+                }
+
+                JSON_THROW(invalid_iterator::create(214, "cannot get value", m_object));
+            }
+        }
+    }
+
+    /*!
+    @brief return the key of an object iterator
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    const typename object_t::key_type& key() const
+    {
+        JSON_ASSERT(m_object != nullptr);
+
+        if (JSON_HEDLEY_LIKELY(m_object->is_object()))
+        {
+            return m_it.object_iterator->first;
+        }
+
+        JSON_THROW(invalid_iterator::create(207, "cannot use key() for non-object iterators", m_object));
+    }
+
+    /*!
+    @brief return the value of an iterator
+    @pre The iterator is initialized; i.e. `m_object != nullptr`.
+    */
+    reference value() const
+    {
+        return operator*();
+    }
+
+  JSON_PRIVATE_UNLESS_TESTED:
+    /// associated JSON instance
+    pointer m_object = nullptr;
+    /// the actual iterator of the associated instance
+    internal_iterator<typename std::remove_const<BasicJsonType>::type> m_it {};
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/iterators/iteration_proxy.hpp>
+
+// #include <qualla/detail/iterators/json_reverse_iterator.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <cstddef> // ptrdiff_t
+#include <iterator> // reverse_iterator
+#include <utility> // declval
+
+// #include <qualla/detail/abi_macros.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+//////////////////////
+// reverse_iterator //
+//////////////////////
+
+/*!
+@brief a template for a reverse iterator class
+
+@tparam Base the base iterator type to reverse. Valid types are @ref
+iterator (to create @ref reverse_iterator) and @ref const_iterator (to
+create @ref const_reverse_iterator).
+
+@requirement The class satisfies the following concept requirements:
+-
+[BidirectionalIterator](https://en.cppreference.com/w/cpp/named_req/BidirectionalIterator):
+  The iterator that can be moved can be moved in both directions (i.e.
+  incremented and decremented).
+- [OutputIterator](https://en.cppreference.com/w/cpp/named_req/OutputIterator):
+  It is possible to write to the pointed-to element (only if @a Base is
+  @ref iterator).
+
+@since version 1.0.0
+*/
+template<typename Base>
+class json_reverse_iterator : public std::reverse_iterator<Base>
+{
+  public:
+    using difference_type = std::ptrdiff_t;
+    /// shortcut to the reverse iterator adapter
+    using base_iterator = std::reverse_iterator<Base>;
+    /// the reference type for the pointed-to element
+    using reference = typename Base::reference;
+
+    /// create reverse iterator from iterator
+    explicit json_reverse_iterator(const typename base_iterator::iterator_type& it) noexcept
+        : base_iterator(it) {}
+
+    /// create reverse iterator from base class
+    explicit json_reverse_iterator(const base_iterator& it) noexcept : base_iterator(it) {}
+
+    /// post-increment (it++)
+    json_reverse_iterator operator++(int)& // NOLINT(cert-dcl21-cpp)
+    {
+        return static_cast<json_reverse_iterator>(base_iterator::operator++(1));
+    }
+
+    /// pre-increment (++it)
+    json_reverse_iterator& operator++()
+    {
+        return static_cast<json_reverse_iterator&>(base_iterator::operator++());
+    }
+
+    /// post-decrement (it--)
+    json_reverse_iterator operator--(int)& // NOLINT(cert-dcl21-cpp)
+    {
+        return static_cast<json_reverse_iterator>(base_iterator::operator--(1));
+    }
+
+    /// pre-decrement (--it)
+    json_reverse_iterator& operator--()
+    {
+        return static_cast<json_reverse_iterator&>(base_iterator::operator--());
+    }
+
+    /// add to iterator
+    json_reverse_iterator& operator+=(difference_type i)
+    {
+        return static_cast<json_reverse_iterator&>(base_iterator::operator+=(i));
+    }
+
+    /// add to iterator
+    json_reverse_iterator operator+(difference_type i) const
+    {
+        return static_cast<json_reverse_iterator>(base_iterator::operator+(i));
+    }
+
+    /// subtract from iterator
+    json_reverse_iterator operator-(difference_type i) const
+    {
+        return static_cast<json_reverse_iterator>(base_iterator::operator-(i));
+    }
+
+    /// return difference
+    difference_type operator-(const json_reverse_iterator& other) const
+    {
+        return base_iterator(*this) - base_iterator(other);
+    }
+
+    /// access to successor
+    reference operator[](difference_type n) const
+    {
+        return *(this->operator+(n));
+    }
+
+    /// return the key of an object iterator
+    auto key() const -> decltype(std::declval<Base>().key())
+    {
+        auto it = --this->base();
+        return it.key();
+    }
+
+    /// return the value of an iterator
+    reference value() const
+    {
+        auto it = --this->base();
+        return it.operator * ();
+    }
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/iterators/primitive_iterator.hpp>
+
+// #include <qualla/detail/json_pointer.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <algorithm> // all_of
+#include <cctype> // isdigit
+#include <cerrno> // errno, ERANGE
+#include <cstdlib> // strtoull
+#ifndef JSON_NO_IO
+    #include <iosfwd> // ostream
+#endif  // JSON_NO_IO
+#include <limits> // max
+#include <numeric> // accumulate
+#include <string> // string
+#include <utility> // move
+#include <vector> // vector
+
+// #include <qualla/detail/exceptions.hpp>
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/string_concat.hpp>
+
+// #include <qualla/detail/string_escape.hpp>
+
+// #include <qualla/detail/value_t.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+
+/// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document
+/// @sa https://json.qualla.me/api/json_pointer/
+template<typename RefStringType>
+class json_pointer
+{
+    // allow basic_json to access private members
+    NLOHMANN_BASIC_JSON_TPL_DECLARATION
+    friend class basic_json;
+
+    template<typename>
+    friend class json_pointer;
+
+    template<typename T>
+    struct string_t_helper
+    {
+        using type = T;
+    };
+
+    NLOHMANN_BASIC_JSON_TPL_DECLARATION
+    struct string_t_helper<NLOHMANN_BASIC_JSON_TPL>
+    {
+        using type = StringType;
+    };
+
+  public:
+    // for backwards compatibility accept BasicJsonType
+    using string_t = typename string_t_helper<RefStringType>::type;
+
+    /// @brief create JSON pointer
+    /// @sa https://json.qualla.me/api/json_pointer/json_pointer/
+    explicit json_pointer(const string_t& s = "")
+        : reference_tokens(split(s))
+    {}
+
+    /// @brief return a string representation of the JSON pointer
+    /// @sa https://json.qualla.me/api/json_pointer/to_string/
+    string_t to_string() const
+    {
+        return std::accumulate(reference_tokens.begin(), reference_tokens.end(),
+                               string_t{},
+                               [](const string_t& a, const string_t& b)
+        {
+            return detail::concat(a, '/', detail::escape(b));
+        });
+    }
+
+    /// @brief return a string representation of the JSON pointer
+    /// @sa https://json.qualla.me/api/json_pointer/operator_string/
+    JSON_HEDLEY_DEPRECATED_FOR(3.11.0, to_string())
+    operator string_t() const
+    {
+        return to_string();
+    }
+
+#ifndef JSON_NO_IO
+    /// @brief write string representation of the JSON pointer to stream
+    /// @sa https://json.qualla.me/api/basic_json/operator_ltlt/
+    friend std::ostream& operator<<(std::ostream& o, const json_pointer& ptr)
+    {
+        o << ptr.to_string();
+        return o;
+    }
+#endif
+
+    /// @brief append another JSON pointer at the end of this JSON pointer
+    /// @sa https://json.qualla.me/api/json_pointer/operator_slasheq/
+    json_pointer& operator/=(const json_pointer& ptr)
+    {
+        reference_tokens.insert(reference_tokens.end(),
+                                ptr.reference_tokens.begin(),
+                                ptr.reference_tokens.end());
+        return *this;
+    }
+
+    /// @brief append an unescaped reference token at the end of this JSON pointer
+    /// @sa https://json.qualla.me/api/json_pointer/operator_slasheq/
+    json_pointer& operator/=(string_t token)
+    {
+        push_back(std::move(token));
+        return *this;
+    }
+
+    /// @brief append an array index at the end of this JSON pointer
+    /// @sa https://json.qualla.me/api/json_pointer/operator_slasheq/
+    json_pointer& operator/=(std::size_t array_idx)
+    {
+        return *this /= std::to_string(array_idx);
+    }
+
+    /// @brief create a new JSON pointer by appending the right JSON pointer at the end of the left JSON pointer
+    /// @sa https://json.qualla.me/api/json_pointer/operator_slash/
+    friend json_pointer operator/(const json_pointer& lhs,
+                                  const json_pointer& rhs)
+    {
+        return json_pointer(lhs) /= rhs;
+    }
+
+    /// @brief create a new JSON pointer by appending the unescaped token at the end of the JSON pointer
+    /// @sa https://json.qualla.me/api/json_pointer/operator_slash/
+    friend json_pointer operator/(const json_pointer& lhs, string_t token) // NOLINT(performance-unnecessary-value-param)
+    {
+        return json_pointer(lhs) /= std::move(token);
+    }
+
+    /// @brief create a new JSON pointer by appending the array-index-token at the end of the JSON pointer
+    /// @sa https://json.qualla.me/api/json_pointer/operator_slash/
+    friend json_pointer operator/(const json_pointer& lhs, std::size_t array_idx)
+    {
+        return json_pointer(lhs) /= array_idx;
+    }
+
+    /// @brief returns the parent of this JSON pointer
+    /// @sa https://json.qualla.me/api/json_pointer/parent_pointer/
+    json_pointer parent_pointer() const
+    {
+        if (empty())
+        {
+            return *this;
+        }
+
+        json_pointer res = *this;
+        res.pop_back();
+        return res;
+    }
+
+    /// @brief remove last reference token
+    /// @sa https://json.qualla.me/api/json_pointer/pop_back/
+    void pop_back()
+    {
+        if (JSON_HEDLEY_UNLIKELY(empty()))
+        {
+            JSON_THROW(detail::out_of_range::create(405, "JSON pointer has no parent", nullptr));
+        }
+
+        reference_tokens.pop_back();
+    }
+
+    /// @brief return last reference token
+    /// @sa https://json.qualla.me/api/json_pointer/back/
+    const string_t& back() const
+    {
+        if (JSON_HEDLEY_UNLIKELY(empty()))
+        {
+            JSON_THROW(detail::out_of_range::create(405, "JSON pointer has no parent", nullptr));
+        }
+
+        return reference_tokens.back();
+    }
+
+    /// @brief append an unescaped token at the end of the reference pointer
+    /// @sa https://json.qualla.me/api/json_pointer/push_back/
+    void push_back(const string_t& token)
+    {
+        reference_tokens.push_back(token);
+    }
+
+    /// @brief append an unescaped token at the end of the reference pointer
+    /// @sa https://json.qualla.me/api/json_pointer/push_back/
+    void push_back(string_t&& token)
+    {
+        reference_tokens.push_back(std::move(token));
+    }
+
+    /// @brief return whether pointer points to the root document
+    /// @sa https://json.qualla.me/api/json_pointer/empty/
+    bool empty() const noexcept
+    {
+        return reference_tokens.empty();
+    }
+
+  private:
+    /*!
+    @param[in] s  reference token to be converted into an array index
+
+    @return integer representation of @a s
+
+    @throw parse_error.106  if an array index begins with '0'
+    @throw parse_error.109  if an array index begins not with a digit
+    @throw out_of_range.404 if string @a s could not be converted to an integer
+    @throw out_of_range.410 if an array index exceeds size_type
+    */
+    template<typename BasicJsonType>
+    static typename BasicJsonType::size_type array_index(const string_t& s)
+    {
+        using size_type = typename BasicJsonType::size_type;
+
+        // error condition (cf. RFC 6901, Sect. 4)
+        if (JSON_HEDLEY_UNLIKELY(s.size() > 1 && s[0] == '0'))
+        {
+            JSON_THROW(detail::parse_error::create(106, 0, detail::concat("array index '", s, "' must not begin with '0'"), nullptr));
+        }
+
+        // error condition (cf. RFC 6901, Sect. 4)
+        if (JSON_HEDLEY_UNLIKELY(s.size() > 1 && !(s[0] >= '1' && s[0] <= '9')))
+        {
+            JSON_THROW(detail::parse_error::create(109, 0, detail::concat("array index '", s, "' is not a number"), nullptr));
+        }
+
+        const char* p = s.c_str();
+        char* p_end = nullptr;
+        errno = 0; // strtoull doesn't reset errno
+        unsigned long long res = std::strtoull(p, &p_end, 10); // NOLINT(runtime/int)
+        if (p == p_end // invalid input or empty string
+                || errno == ERANGE // out of range
+                || JSON_HEDLEY_UNLIKELY(static_cast<std::size_t>(p_end - p) != s.size())) // incomplete read
+        {
+            JSON_THROW(detail::out_of_range::create(404, detail::concat("unresolved reference token '", s, "'"), nullptr));
+        }
+
+        // only triggered on special platforms (like 32bit), see also
+        // https://github.com/nlohmann/json/pull/2203
+        if (res >= static_cast<unsigned long long>((std::numeric_limits<size_type>::max)()))  // NOLINT(runtime/int)
+        {
+            JSON_THROW(detail::out_of_range::create(410, detail::concat("array index ", s, " exceeds size_type"), nullptr));   // LCOV_EXCL_LINE
+        }
+
+        return static_cast<size_type>(res);
+    }
+
+  JSON_PRIVATE_UNLESS_TESTED:
+    json_pointer top() const
+    {
+        if (JSON_HEDLEY_UNLIKELY(empty()))
+        {
+            JSON_THROW(detail::out_of_range::create(405, "JSON pointer has no parent", nullptr));
+        }
+
+        json_pointer result = *this;
+        result.reference_tokens = {reference_tokens[0]};
+        return result;
+    }
+
+  private:
+    /*!
+    @brief create and return a reference to the pointed to value
+
+    @complexity Linear in the number of reference tokens.
+
+    @throw parse_error.109 if array index is not a number
+    @throw type_error.313 if value cannot be unflattened
+    */
+    template<typename BasicJsonType>
+    BasicJsonType& get_and_create(BasicJsonType& j) const
+    {
+        auto* result = &j;
+
+        // in case no reference tokens exist, return a reference to the JSON value
+        // j which will be overwritten by a primitive value
+        for (const auto& reference_token : reference_tokens)
+        {
+            switch (result->type())
+            {
+                case detail::value_t::null:
+                {
+                    if (reference_token == "0")
+                    {
+                        // start a new array if reference token is 0
+                        result = &result->operator[](0);
+                    }
+                    else
+                    {
+                        // start a new object otherwise
+                        result = &result->operator[](reference_token);
+                    }
+                    break;
+                }
+
+                case detail::value_t::object:
+                {
+                    // create an entry in the object
+                    result = &result->operator[](reference_token);
+                    break;
+                }
+
+                case detail::value_t::array:
+                {
+                    // create an entry in the array
+                    result = &result->operator[](array_index<BasicJsonType>(reference_token));
+                    break;
+                }
+
+                /*
+                The following code is only reached if there exists a reference
+                token _and_ the current value is primitive. In this case, we have
+                an error situation, because primitive values may only occur as
+                single value; that is, with an empty list of reference tokens.
+                */
+                case detail::value_t::string:
+                case detail::value_t::boolean:
+                case detail::value_t::number_integer:
+                case detail::value_t::number_unsigned:
+                case detail::value_t::number_float:
+                case detail::value_t::binary:
+                case detail::value_t::discarded:
+                default:
+                    JSON_THROW(detail::type_error::create(313, "invalid value to unflatten", &j));
+            }
+        }
+
+        return *result;
+    }
+
+    /*!
+    @brief return a reference to the pointed to value
+
+    @note This version does not throw if a value is not present, but tries to
+          create nested values instead. For instance, calling this function
+          with pointer `"/this/that"` on a null value is equivalent to calling
+          `operator[]("this").operator[]("that")` on that value, effectively
+          changing the null value to an object.
+
+    @param[in] ptr  a JSON value
+
+    @return reference to the JSON value pointed to by the JSON pointer
+
+    @complexity Linear in the length of the JSON pointer.
+
+    @throw parse_error.106   if an array index begins with '0'
+    @throw parse_error.109   if an array index was not a number
+    @throw out_of_range.404  if the JSON pointer can not be resolved
+    */
+    template<typename BasicJsonType>
+    BasicJsonType& get_unchecked(BasicJsonType* ptr) const
+    {
+        for (const auto& reference_token : reference_tokens)
+        {
+            // convert null values to arrays or objects before continuing
+            if (ptr->is_null())
+            {
+                // check if reference token is a number
+                const bool nums =
+                    std::all_of(reference_token.begin(), reference_token.end(),
+                                [](const unsigned char x)
+                {
+                    return std::isdigit(x);
+                });
+
+                // change value to array for numbers or "-" or to object otherwise
+                *ptr = (nums || reference_token == "-")
+                       ? detail::value_t::array
+                       : detail::value_t::object;
+            }
+
+            switch (ptr->type())
+            {
+                case detail::value_t::object:
+                {
+                    // use unchecked object access
+                    ptr = &ptr->operator[](reference_token);
+                    break;
+                }
+
+                case detail::value_t::array:
+                {
+                    if (reference_token == "-")
+                    {
+                        // explicitly treat "-" as index beyond the end
+                        ptr = &ptr->operator[](ptr->m_value.array->size());
+                    }
+                    else
+                    {
+                        // convert array index to number; unchecked access
+                        ptr = &ptr->operator[](array_index<BasicJsonType>(reference_token));
+                    }
+                    break;
+                }
+
+                case detail::value_t::null:
+                case detail::value_t::string:
+                case detail::value_t::boolean:
+                case detail::value_t::number_integer:
+                case detail::value_t::number_unsigned:
+                case detail::value_t::number_float:
+                case detail::value_t::binary:
+                case detail::value_t::discarded:
+                default:
+                    JSON_THROW(detail::out_of_range::create(404, detail::concat("unresolved reference token '", reference_token, "'"), ptr));
+            }
+        }
+
+        return *ptr;
+    }
+
+    /*!
+    @throw parse_error.106   if an array index begins with '0'
+    @throw parse_error.109   if an array index was not a number
+    @throw out_of_range.402  if the array index '-' is used
+    @throw out_of_range.404  if the JSON pointer can not be resolved
+    */
+    template<typename BasicJsonType>
+    BasicJsonType& get_checked(BasicJsonType* ptr) const
+    {
+        for (const auto& reference_token : reference_tokens)
+        {
+            switch (ptr->type())
+            {
+                case detail::value_t::object:
+                {
+                    // note: at performs range check
+                    ptr = &ptr->at(reference_token);
+                    break;
+                }
+
+                case detail::value_t::array:
+                {
+                    if (JSON_HEDLEY_UNLIKELY(reference_token == "-"))
+                    {
+                        // "-" always fails the range check
+                        JSON_THROW(detail::out_of_range::create(402, detail::concat(
+                                "array index '-' (", std::to_string(ptr->m_value.array->size()),
+                                ") is out of range"), ptr));
+                    }
+
+                    // note: at performs range check
+                    ptr = &ptr->at(array_index<BasicJsonType>(reference_token));
+                    break;
+                }
+
+                case detail::value_t::null:
+                case detail::value_t::string:
+                case detail::value_t::boolean:
+                case detail::value_t::number_integer:
+                case detail::value_t::number_unsigned:
+                case detail::value_t::number_float:
+                case detail::value_t::binary:
+                case detail::value_t::discarded:
+                default:
+                    JSON_THROW(detail::out_of_range::create(404, detail::concat("unresolved reference token '", reference_token, "'"), ptr));
+            }
+        }
+
+        return *ptr;
+    }
+
+    /*!
+    @brief return a const reference to the pointed to value
+
+    @param[in] ptr  a JSON value
+
+    @return const reference to the JSON value pointed to by the JSON
+    pointer
+
+    @throw parse_error.106   if an array index begins with '0'
+    @throw parse_error.109   if an array index was not a number
+    @throw out_of_range.402  if the array index '-' is used
+    @throw out_of_range.404  if the JSON pointer can not be resolved
+    */
+    template<typename BasicJsonType>
+    const BasicJsonType& get_unchecked(const BasicJsonType* ptr) const
+    {
+        for (const auto& reference_token : reference_tokens)
+        {
+            switch (ptr->type())
+            {
+                case detail::value_t::object:
+                {
+                    // use unchecked object access
+                    ptr = &ptr->operator[](reference_token);
+                    break;
+                }
+
+                case detail::value_t::array:
+                {
+                    if (JSON_HEDLEY_UNLIKELY(reference_token == "-"))
+                    {
+                        // "-" cannot be used for const access
+                        JSON_THROW(detail::out_of_range::create(402, detail::concat("array index '-' (", std::to_string(ptr->m_value.array->size()), ") is out of range"), ptr));
+                    }
+
+                    // use unchecked array access
+                    ptr = &ptr->operator[](array_index<BasicJsonType>(reference_token));
+                    break;
+                }
+
+                case detail::value_t::null:
+                case detail::value_t::string:
+                case detail::value_t::boolean:
+                case detail::value_t::number_integer:
+                case detail::value_t::number_unsigned:
+                case detail::value_t::number_float:
+                case detail::value_t::binary:
+                case detail::value_t::discarded:
+                default:
+                    JSON_THROW(detail::out_of_range::create(404, detail::concat("unresolved reference token '", reference_token, "'"), ptr));
+            }
+        }
+
+        return *ptr;
+    }
+
+    /*!
+    @throw parse_error.106   if an array index begins with '0'
+    @throw parse_error.109   if an array index was not a number
+    @throw out_of_range.402  if the array index '-' is used
+    @throw out_of_range.404  if the JSON pointer can not be resolved
+    */
+    template<typename BasicJsonType>
+    const BasicJsonType& get_checked(const BasicJsonType* ptr) const
+    {
+        for (const auto& reference_token : reference_tokens)
+        {
+            switch (ptr->type())
+            {
+                case detail::value_t::object:
+                {
+                    // note: at performs range check
+                    ptr = &ptr->at(reference_token);
+                    break;
+                }
+
+                case detail::value_t::array:
+                {
+                    if (JSON_HEDLEY_UNLIKELY(reference_token == "-"))
+                    {
+                        // "-" always fails the range check
+                        JSON_THROW(detail::out_of_range::create(402, detail::concat(
+                                "array index '-' (", std::to_string(ptr->m_value.array->size()),
+                                ") is out of range"), ptr));
+                    }
+
+                    // note: at performs range check
+                    ptr = &ptr->at(array_index<BasicJsonType>(reference_token));
+                    break;
+                }
+
+                case detail::value_t::null:
+                case detail::value_t::string:
+                case detail::value_t::boolean:
+                case detail::value_t::number_integer:
+                case detail::value_t::number_unsigned:
+                case detail::value_t::number_float:
+                case detail::value_t::binary:
+                case detail::value_t::discarded:
+                default:
+                    JSON_THROW(detail::out_of_range::create(404, detail::concat("unresolved reference token '", reference_token, "'"), ptr));
+            }
+        }
+
+        return *ptr;
+    }
+
+    /*!
+    @throw parse_error.106   if an array index begins with '0'
+    @throw parse_error.109   if an array index was not a number
+    */
+    template<typename BasicJsonType>
+    bool contains(const BasicJsonType* ptr) const
+    {
+        for (const auto& reference_token : reference_tokens)
+        {
+            switch (ptr->type())
+            {
+                case detail::value_t::object:
+                {
+                    if (!ptr->contains(reference_token))
+                    {
+                        // we did not find the key in the object
+                        return false;
+                    }
+
+                    ptr = &ptr->operator[](reference_token);
+                    break;
+                }
+
+                case detail::value_t::array:
+                {
+                    if (JSON_HEDLEY_UNLIKELY(reference_token == "-"))
+                    {
+                        // "-" always fails the range check
+                        return false;
+                    }
+                    if (JSON_HEDLEY_UNLIKELY(reference_token.size() == 1 && !("0" <= reference_token && reference_token <= "9")))
+                    {
+                        // invalid char
+                        return false;
+                    }
+                    if (JSON_HEDLEY_UNLIKELY(reference_token.size() > 1))
+                    {
+                        if (JSON_HEDLEY_UNLIKELY(!('1' <= reference_token[0] && reference_token[0] <= '9')))
+                        {
+                            // first char should be between '1' and '9'
+                            return false;
+                        }
+                        for (std::size_t i = 1; i < reference_token.size(); i++)
+                        {
+                            if (JSON_HEDLEY_UNLIKELY(!('0' <= reference_token[i] && reference_token[i] <= '9')))
+                            {
+                                // other char should be between '0' and '9'
+                                return false;
+                            }
+                        }
+                    }
+
+                    const auto idx = array_index<BasicJsonType>(reference_token);
+                    if (idx >= ptr->size())
+                    {
+                        // index out of range
+                        return false;
+                    }
+
+                    ptr = &ptr->operator[](idx);
+                    break;
+                }
+
+                case detail::value_t::null:
+                case detail::value_t::string:
+                case detail::value_t::boolean:
+                case detail::value_t::number_integer:
+                case detail::value_t::number_unsigned:
+                case detail::value_t::number_float:
+                case detail::value_t::binary:
+                case detail::value_t::discarded:
+                default:
+                {
+                    // we do not expect primitive values if there is still a
+                    // reference token to process
+                    return false;
+                }
+            }
+        }
+
+        // no reference token left means we found a primitive value
+        return true;
+    }
+
+    /*!
+    @brief split the string input to reference tokens
+
+    @note This function is only called by the json_pointer constructor.
+          All exceptions below are documented there.
+
+    @throw parse_error.107  if the pointer is not empty or begins with '/'
+    @throw parse_error.108  if character '~' is not followed by '0' or '1'
+    */
+    static std::vector<string_t> split(const string_t& reference_string)
+    {
+        std::vector<string_t> result;
+
+        // special case: empty reference string -> no reference tokens
+        if (reference_string.empty())
+        {
+            return result;
+        }
+
+        // check if nonempty reference string begins with slash
+        if (JSON_HEDLEY_UNLIKELY(reference_string[0] != '/'))
+        {
+            JSON_THROW(detail::parse_error::create(107, 1, detail::concat("JSON pointer must be empty or begin with '/' - was: '", reference_string, "'"), nullptr));
+        }
+
+        // extract the reference tokens:
+        // - slash: position of the last read slash (or end of string)
+        // - start: position after the previous slash
+        for (
+            // search for the first slash after the first character
+            std::size_t slash = reference_string.find_first_of('/', 1),
+            // set the beginning of the first reference token
+            start = 1;
+            // we can stop if start == 0 (if slash == string_t::npos)
+            start != 0;
+            // set the beginning of the next reference token
+            // (will eventually be 0 if slash == string_t::npos)
+            start = (slash == string_t::npos) ? 0 : slash + 1,
+            // find next slash
+            slash = reference_string.find_first_of('/', start))
+        {
+            // use the text between the beginning of the reference token
+            // (start) and the last slash (slash).
+            auto reference_token = reference_string.substr(start, slash - start);
+
+            // check reference tokens are properly escaped
+            for (std::size_t pos = reference_token.find_first_of('~');
+                    pos != string_t::npos;
+                    pos = reference_token.find_first_of('~', pos + 1))
+            {
+                JSON_ASSERT(reference_token[pos] == '~');
+
+                // ~ must be followed by 0 or 1
+                if (JSON_HEDLEY_UNLIKELY(pos == reference_token.size() - 1 ||
+                                         (reference_token[pos + 1] != '0' &&
+                                          reference_token[pos + 1] != '1')))
+                {
+                    JSON_THROW(detail::parse_error::create(108, 0, "escape character '~' must be followed with '0' or '1'", nullptr));
+                }
+            }
+
+            // finally, store the reference token
+            detail::unescape(reference_token);
+            result.push_back(reference_token);
+        }
+
+        return result;
+    }
+
+  private:
+    /*!
+    @param[in] reference_string  the reference string to the current value
+    @param[in] value             the value to consider
+    @param[in,out] result        the result object to insert values to
+
+    @note Empty objects or arrays are flattened to `null`.
+    */
+    template<typename BasicJsonType>
+    static void flatten(const string_t& reference_string,
+                        const BasicJsonType& value,
+                        BasicJsonType& result)
+    {
+        switch (value.type())
+        {
+            case detail::value_t::array:
+            {
+                if (value.m_value.array->empty())
+                {
+                    // flatten empty array as null
+                    result[reference_string] = nullptr;
+                }
+                else
+                {
+                    // iterate array and use index as reference string
+                    for (std::size_t i = 0; i < value.m_value.array->size(); ++i)
+                    {
+                        flatten(detail::concat(reference_string, '/', std::to_string(i)),
+                                value.m_value.array->operator[](i), result);
+                    }
+                }
+                break;
+            }
+
+            case detail::value_t::object:
+            {
+                if (value.m_value.object->empty())
+                {
+                    // flatten empty object as null
+                    result[reference_string] = nullptr;
+                }
+                else
+                {
+                    // iterate object and use keys as reference string
+                    for (const auto& element : *value.m_value.object)
+                    {
+                        flatten(detail::concat(reference_string, '/', detail::escape(element.first)), element.second, result);
+                    }
+                }
+                break;
+            }
+
+            case detail::value_t::null:
+            case detail::value_t::string:
+            case detail::value_t::boolean:
+            case detail::value_t::number_integer:
+            case detail::value_t::number_unsigned:
+            case detail::value_t::number_float:
+            case detail::value_t::binary:
+            case detail::value_t::discarded:
+            default:
+            {
+                // add primitive value with its reference string
+                result[reference_string] = value;
+                break;
+            }
+        }
+    }
+
+    /*!
+    @param[in] value  flattened JSON
+
+    @return unflattened JSON
+
+    @throw parse_error.109 if array index is not a number
+    @throw type_error.314  if value is not an object
+    @throw type_error.315  if object values are not primitive
+    @throw type_error.313  if value cannot be unflattened
+    */
+    template<typename BasicJsonType>
+    static BasicJsonType
+    unflatten(const BasicJsonType& value)
+    {
+        if (JSON_HEDLEY_UNLIKELY(!value.is_object()))
+        {
+            JSON_THROW(detail::type_error::create(314, "only objects can be unflattened", &value));
+        }
+
+        BasicJsonType result;
+
+        // iterate the JSON object values
+        for (const auto& element : *value.m_value.object)
+        {
+            if (JSON_HEDLEY_UNLIKELY(!element.second.is_primitive()))
+            {
+                JSON_THROW(detail::type_error::create(315, "values in object must be primitive", &element.second));
+            }
+
+            // assign value to reference pointed to by JSON pointer; Note that if
+            // the JSON pointer is "" (i.e., points to the whole value), function
+            // get_and_create returns a reference to result itself. An assignment
+            // will then create a primitive value.
+            json_pointer(element.first).get_and_create(result) = element.second;
+        }
+
+        return result;
+    }
+
+    // can't use conversion operator because of ambiguity
+    json_pointer<string_t> convert() const&
+    {
+        json_pointer<string_t> result;
+        result.reference_tokens = reference_tokens;
+        return result;
+    }
+
+    json_pointer<string_t> convert()&&
+    {
+        json_pointer<string_t> result;
+        result.reference_tokens = std::move(reference_tokens);
+        return result;
+    }
+
+  public:
+#if JSON_HAS_THREE_WAY_COMPARISON
+    /// @brief compares two JSON pointers for equality
+    /// @sa https://json.qualla.me/api/json_pointer/operator_eq/
+    template<typename RefStringTypeRhs>
+    bool operator==(const json_pointer<RefStringTypeRhs>& rhs) const noexcept
+    {
+        return reference_tokens == rhs.reference_tokens;
+    }
+
+    /// @brief compares JSON pointer and string for equality
+    /// @sa https://json.qualla.me/api/json_pointer/operator_eq/
+    JSON_HEDLEY_DEPRECATED_FOR(3.11.2, operator==(json_pointer))
+    bool operator==(const string_t& rhs) const
+    {
+        return *this == json_pointer(rhs);
+    }
+
+    /// @brief 3-way compares two JSON pointers
+    template<typename RefStringTypeRhs>
+    std::strong_ordering operator<=>(const json_pointer<RefStringTypeRhs>& rhs) const noexcept // *NOPAD*
+    {
+        return  reference_tokens <=> rhs.reference_tokens; // *NOPAD*
+    }
+#else
+    /// @brief compares two JSON pointers for equality
+    /// @sa https://json.qualla.me/api/json_pointer/operator_eq/
+    template<typename RefStringTypeLhs, typename RefStringTypeRhs>
+    // NOLINTNEXTLINE(readability-redundant-declaration)
+    friend bool operator==(const json_pointer<RefStringTypeLhs>& lhs,
+                           const json_pointer<RefStringTypeRhs>& rhs) noexcept;
+
+    /// @brief compares JSON pointer and string for equality
+    /// @sa https://json.qualla.me/api/json_pointer/operator_eq/
+    template<typename RefStringTypeLhs, typename StringType>
+    // NOLINTNEXTLINE(readability-redundant-declaration)
+    friend bool operator==(const json_pointer<RefStringTypeLhs>& lhs,
+                           const StringType& rhs);
+
+    /// @brief compares string and JSON pointer for equality
+    /// @sa https://json.qualla.me/api/json_pointer/operator_eq/
+    template<typename RefStringTypeRhs, typename StringType>
+    // NOLINTNEXTLINE(readability-redundant-declaration)
+    friend bool operator==(const StringType& lhs,
+                           const json_pointer<RefStringTypeRhs>& rhs);
+
+    /// @brief compares two JSON pointers for inequality
+    /// @sa https://json.qualla.me/api/json_pointer/operator_ne/
+    template<typename RefStringTypeLhs, typename RefStringTypeRhs>
+    // NOLINTNEXTLINE(readability-redundant-declaration)
+    friend bool operator!=(const json_pointer<RefStringTypeLhs>& lhs,
+                           const json_pointer<RefStringTypeRhs>& rhs) noexcept;
+
+    /// @brief compares JSON pointer and string for inequality
+    /// @sa https://json.qualla.me/api/json_pointer/operator_ne/
+    template<typename RefStringTypeLhs, typename StringType>
+    // NOLINTNEXTLINE(readability-redundant-declaration)
+    friend bool operator!=(const json_pointer<RefStringTypeLhs>& lhs,
+                           const StringType& rhs);
+
+    /// @brief compares string and JSON pointer for inequality
+    /// @sa https://json.qualla.me/api/json_pointer/operator_ne/
+    template<typename RefStringTypeRhs, typename StringType>
+    // NOLINTNEXTLINE(readability-redundant-declaration)
+    friend bool operator!=(const StringType& lhs,
+                           const json_pointer<RefStringTypeRhs>& rhs);
+
+    /// @brief compares two JSON pointer for less-than
+    template<typename RefStringTypeLhs, typename RefStringTypeRhs>
+    // NOLINTNEXTLINE(readability-redundant-declaration)
+    friend bool operator<(const json_pointer<RefStringTypeLhs>& lhs,
+                          const json_pointer<RefStringTypeRhs>& rhs) noexcept;
+#endif
+
+  private:
+    /// the reference tokens
+    std::vector<string_t> reference_tokens;
+};
+
+#if !JSON_HAS_THREE_WAY_COMPARISON
+// functions cannot be defined inside class due to ODR violations
+template<typename RefStringTypeLhs, typename RefStringTypeRhs>
+inline bool operator==(const json_pointer<RefStringTypeLhs>& lhs,
+                       const json_pointer<RefStringTypeRhs>& rhs) noexcept
+{
+    return lhs.reference_tokens == rhs.reference_tokens;
+}
+
+template<typename RefStringTypeLhs,
+         typename StringType = typename json_pointer<RefStringTypeLhs>::string_t>
+JSON_HEDLEY_DEPRECATED_FOR(3.11.2, operator==(json_pointer, json_pointer))
+inline bool operator==(const json_pointer<RefStringTypeLhs>& lhs,
+                       const StringType& rhs)
+{
+    return lhs == json_pointer<RefStringTypeLhs>(rhs);
+}
+
+template<typename RefStringTypeRhs,
+         typename StringType = typename json_pointer<RefStringTypeRhs>::string_t>
+JSON_HEDLEY_DEPRECATED_FOR(3.11.2, operator==(json_pointer, json_pointer))
+inline bool operator==(const StringType& lhs,
+                       const json_pointer<RefStringTypeRhs>& rhs)
+{
+    return json_pointer<RefStringTypeRhs>(lhs) == rhs;
+}
+
+template<typename RefStringTypeLhs, typename RefStringTypeRhs>
+inline bool operator!=(const json_pointer<RefStringTypeLhs>& lhs,
+                       const json_pointer<RefStringTypeRhs>& rhs) noexcept
+{
+    return !(lhs == rhs);
+}
+
+template<typename RefStringTypeLhs,
+         typename StringType = typename json_pointer<RefStringTypeLhs>::string_t>
+JSON_HEDLEY_DEPRECATED_FOR(3.11.2, operator!=(json_pointer, json_pointer))
+inline bool operator!=(const json_pointer<RefStringTypeLhs>& lhs,
+                       const StringType& rhs)
+{
+    return !(lhs == rhs);
+}
+
+template<typename RefStringTypeRhs,
+         typename StringType = typename json_pointer<RefStringTypeRhs>::string_t>
+JSON_HEDLEY_DEPRECATED_FOR(3.11.2, operator!=(json_pointer, json_pointer))
+inline bool operator!=(const StringType& lhs,
+                       const json_pointer<RefStringTypeRhs>& rhs)
+{
+    return !(lhs == rhs);
+}
+
+template<typename RefStringTypeLhs, typename RefStringTypeRhs>
+inline bool operator<(const json_pointer<RefStringTypeLhs>& lhs,
+                      const json_pointer<RefStringTypeRhs>& rhs) noexcept
+{
+    return lhs.reference_tokens < rhs.reference_tokens;
+}
+#endif
+
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/json_ref.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <initializer_list>
+#include <utility>
+
+// #include <qualla/detail/abi_macros.hpp>
+
+// #include <qualla/detail/meta/type_traits.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+template<typename BasicJsonType>
+class json_ref
+{
+  public:
+    using value_type = BasicJsonType;
+
+    json_ref(value_type&& value)
+        : owned_value(std::move(value))
+    {}
+
+    json_ref(const value_type& value)
+        : value_ref(&value)
+    {}
+
+    json_ref(std::initializer_list<json_ref> init)
+        : owned_value(init)
+    {}
+
+    template <
+        class... Args,
+        enable_if_t<std::is_constructible<value_type, Args...>::value, int> = 0 >
+    json_ref(Args && ... args)
+        : owned_value(std::forward<Args>(args)...)
+    {}
+
+    // class should be movable only
+    json_ref(json_ref&&) noexcept = default;
+    json_ref(const json_ref&) = delete;
+    json_ref& operator=(const json_ref&) = delete;
+    json_ref& operator=(json_ref&&) = delete;
+    ~json_ref() = default;
+
+    value_type moved_or_copied() const
+    {
+        if (value_ref == nullptr)
+        {
+            return std::move(owned_value);
+        }
+        return *value_ref;
+    }
+
+    value_type const& operator*() const
+    {
+        return value_ref ? *value_ref : owned_value;
+    }
+
+    value_type const* operator->() const
+    {
+        return &** this;
+    }
+
+  private:
+    mutable value_type owned_value = nullptr;
+    value_type const* value_ref = nullptr;
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/string_concat.hpp>
+
+// #include <qualla/detail/string_escape.hpp>
+
+// #include <qualla/detail/meta/cpp_future.hpp>
+
+// #include <qualla/detail/meta/type_traits.hpp>
+
+// #include <qualla/detail/output/binary_writer.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <algorithm> // reverse
+#include <array> // array
+#include <map> // map
+#include <cmath> // isnan, isinf
+#include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
+#include <cstring> // memcpy
+#include <limits> // numeric_limits
+#include <string> // string
+#include <utility> // move
+#include <vector> // vector
+
+// #include <qualla/detail/input/binary_reader.hpp>
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/output/output_adapters.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <algorithm> // copy
+#include <cstddef> // size_t
+#include <iterator> // back_inserter
+#include <memory> // shared_ptr, make_shared
+#include <string> // basic_string
+#include <vector> // vector
+
+#ifndef JSON_NO_IO
+    #include <ios>      // streamsize
+    #include <ostream>  // basic_ostream
+#endif  // JSON_NO_IO
+
+// #include <qualla/detail/macro_scope.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+/// abstract output adapter interface
+template<typename CharType> struct output_adapter_protocol
+{
+    virtual void write_character(CharType c) = 0;
+    virtual void write_characters(const CharType* s, std::size_t length) = 0;
+    virtual ~output_adapter_protocol() = default;
+
+    output_adapter_protocol() = default;
+    output_adapter_protocol(const output_adapter_protocol&) = default;
+    output_adapter_protocol(output_adapter_protocol&&) noexcept = default;
+    output_adapter_protocol& operator=(const output_adapter_protocol&) = default;
+    output_adapter_protocol& operator=(output_adapter_protocol&&) noexcept = default;
+};
+
+/// a type to simplify interfaces
+template<typename CharType>
+using output_adapter_t = std::shared_ptr<output_adapter_protocol<CharType>>;
+
+/// output adapter for byte vectors
+template<typename CharType, typename AllocatorType = std::allocator<CharType>>
+class output_vector_adapter : public output_adapter_protocol<CharType>
+{
+  public:
+    explicit output_vector_adapter(std::vector<CharType, AllocatorType>& vec) noexcept
+        : v(vec)
+    {}
+
+    void write_character(CharType c) override
+    {
+        v.push_back(c);
+    }
+
+    JSON_HEDLEY_NON_NULL(2)
+    void write_characters(const CharType* s, std::size_t length) override
+    {
+        v.insert(v.end(), s, s + length);
+    }
+
+  private:
+    std::vector<CharType, AllocatorType>& v;
+};
+
+#ifndef JSON_NO_IO
+/// output adapter for output streams
+template<typename CharType>
+class output_stream_adapter : public output_adapter_protocol<CharType>
+{
+  public:
+    explicit output_stream_adapter(std::basic_ostream<CharType>& s) noexcept
+        : stream(s)
+    {}
+
+    void write_character(CharType c) override
+    {
+        stream.put(c);
+    }
+
+    JSON_HEDLEY_NON_NULL(2)
+    void write_characters(const CharType* s, std::size_t length) override
+    {
+        stream.write(s, static_cast<std::streamsize>(length));
+    }
+
+  private:
+    std::basic_ostream<CharType>& stream;
+};
+#endif  // JSON_NO_IO
+
+/// output adapter for basic_string
+template<typename CharType, typename StringType = std::basic_string<CharType>>
+class output_string_adapter : public output_adapter_protocol<CharType>
+{
+  public:
+    explicit output_string_adapter(StringType& s) noexcept
+        : str(s)
+    {}
+
+    void write_character(CharType c) override
+    {
+        str.push_back(c);
+    }
+
+    JSON_HEDLEY_NON_NULL(2)
+    void write_characters(const CharType* s, std::size_t length) override
+    {
+        str.append(s, length);
+    }
+
+  private:
+    StringType& str;
+};
+
+template<typename CharType, typename StringType = std::basic_string<CharType>>
+class output_adapter
+{
+  public:
+    template<typename AllocatorType = std::allocator<CharType>>
+    output_adapter(std::vector<CharType, AllocatorType>& vec)
+        : oa(std::make_shared<output_vector_adapter<CharType, AllocatorType>>(vec)) {}
+
+#ifndef JSON_NO_IO
+    output_adapter(std::basic_ostream<CharType>& s)
+        : oa(std::make_shared<output_stream_adapter<CharType>>(s)) {}
+#endif  // JSON_NO_IO
+
+    output_adapter(StringType& s)
+        : oa(std::make_shared<output_string_adapter<CharType, StringType>>(s)) {}
+
+    operator output_adapter_t<CharType>()
+    {
+        return oa;
+    }
+
+  private:
+    output_adapter_t<CharType> oa = nullptr;
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/string_concat.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+///////////////////
+// binary writer //
+///////////////////
+
+/*!
+@brief serialization to CBOR and MessagePack values
+*/
+template<typename BasicJsonType, typename CharType>
+class binary_writer
+{
+    using string_t = typename BasicJsonType::string_t;
+    using binary_t = typename BasicJsonType::binary_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+
+  public:
+    /*!
+    @brief create a binary writer
+
+    @param[in] adapter  output adapter to write to
+    */
+    explicit binary_writer(output_adapter_t<CharType> adapter) : oa(std::move(adapter))
+    {
+        JSON_ASSERT(oa);
+    }
+
+    /*!
+    @param[in] j  JSON value to serialize
+    @pre       j.type() == value_t::object
+    */
+    void write_bson(const BasicJsonType& j)
+    {
+        switch (j.type())
+        {
+            case value_t::object:
+            {
+                write_bson_object(*j.m_value.object);
+                break;
+            }
+
+            case value_t::null:
+            case value_t::array:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                JSON_THROW(type_error::create(317, concat("to serialize to BSON, top-level type must be object, but is ", j.type_name()), &j));
+            }
+        }
+    }
+
+    /*!
+    @param[in] j  JSON value to serialize
+    */
+    void write_cbor(const BasicJsonType& j)
+    {
+        switch (j.type())
+        {
+            case value_t::null:
+            {
+                oa->write_character(to_char_type(0xF6));
+                break;
+            }
+
+            case value_t::boolean:
+            {
+                oa->write_character(j.m_value.boolean
+                                    ? to_char_type(0xF5)
+                                    : to_char_type(0xF4));
+                break;
+            }
+
+            case value_t::number_integer:
+            {
+                if (j.m_value.number_integer >= 0)
+                {
+                    // CBOR does not differentiate between positive signed
+                    // integers and unsigned integers. Therefore, we used the
+                    // code from the value_t::number_unsigned case here.
+                    if (j.m_value.number_integer <= 0x17)
+                    {
+                        write_number(static_cast<std::uint8_t>(j.m_value.number_integer));
+                    }
+                    else if (j.m_value.number_integer <= (std::numeric_limits<std::uint8_t>::max)())
+                    {
+                        oa->write_character(to_char_type(0x18));
+                        write_number(static_cast<std::uint8_t>(j.m_value.number_integer));
+                    }
+                    else if (j.m_value.number_integer <= (std::numeric_limits<std::uint16_t>::max)())
+                    {
+                        oa->write_character(to_char_type(0x19));
+                        write_number(static_cast<std::uint16_t>(j.m_value.number_integer));
+                    }
+                    else if (j.m_value.number_integer <= (std::numeric_limits<std::uint32_t>::max)())
+                    {
+                        oa->write_character(to_char_type(0x1A));
+                        write_number(static_cast<std::uint32_t>(j.m_value.number_integer));
+                    }
+                    else
+                    {
+                        oa->write_character(to_char_type(0x1B));
+                        write_number(static_cast<std::uint64_t>(j.m_value.number_integer));
+                    }
+                }
+                else
+                {
+                    // The conversions below encode the sign in the first
+                    // byte, and the value is converted to a positive number.
+                    const auto positive_number = -1 - j.m_value.number_integer;
+                    if (j.m_value.number_integer >= -24)
+                    {
+                        write_number(static_cast<std::uint8_t>(0x20 + positive_number));
+                    }
+                    else if (positive_number <= (std::numeric_limits<std::uint8_t>::max)())
+                    {
+                        oa->write_character(to_char_type(0x38));
+                        write_number(static_cast<std::uint8_t>(positive_number));
+                    }
+                    else if (positive_number <= (std::numeric_limits<std::uint16_t>::max)())
+                    {
+                        oa->write_character(to_char_type(0x39));
+                        write_number(static_cast<std::uint16_t>(positive_number));
+                    }
+                    else if (positive_number <= (std::numeric_limits<std::uint32_t>::max)())
+                    {
+                        oa->write_character(to_char_type(0x3A));
+                        write_number(static_cast<std::uint32_t>(positive_number));
+                    }
+                    else
+                    {
+                        oa->write_character(to_char_type(0x3B));
+                        write_number(static_cast<std::uint64_t>(positive_number));
+                    }
+                }
+                break;
+            }
+
+            case value_t::number_unsigned:
+            {
+                if (j.m_value.number_unsigned <= 0x17)
+                {
+                    write_number(static_cast<std::uint8_t>(j.m_value.number_unsigned));
+                }
+                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint8_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x18));
+                    write_number(static_cast<std::uint8_t>(j.m_value.number_unsigned));
+                }
+                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint16_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x19));
+                    write_number(static_cast<std::uint16_t>(j.m_value.number_unsigned));
+                }
+                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint32_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x1A));
+                    write_number(static_cast<std::uint32_t>(j.m_value.number_unsigned));
+                }
+                else
+                {
+                    oa->write_character(to_char_type(0x1B));
+                    write_number(static_cast<std::uint64_t>(j.m_value.number_unsigned));
+                }
+                break;
+            }
+
+            case value_t::number_float:
+            {
+                if (std::isnan(j.m_value.number_float))
+                {
+                    // NaN is 0xf97e00 in CBOR
+                    oa->write_character(to_char_type(0xF9));
+                    oa->write_character(to_char_type(0x7E));
+                    oa->write_character(to_char_type(0x00));
+                }
+                else if (std::isinf(j.m_value.number_float))
+                {
+                    // Infinity is 0xf97c00, -Infinity is 0xf9fc00
+                    oa->write_character(to_char_type(0xf9));
+                    oa->write_character(j.m_value.number_float > 0 ? to_char_type(0x7C) : to_char_type(0xFC));
+                    oa->write_character(to_char_type(0x00));
+                }
+                else
+                {
+                    write_compact_float(j.m_value.number_float, detail::input_format_t::cbor);
+                }
+                break;
+            }
+
+            case value_t::string:
+            {
+                // step 1: write control byte and the string length
+                const auto N = j.m_value.string->size();
+                if (N <= 0x17)
+                {
+                    write_number(static_cast<std::uint8_t>(0x60 + N));
+                }
+                else if (N <= (std::numeric_limits<std::uint8_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x78));
+                    write_number(static_cast<std::uint8_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x79));
+                    write_number(static_cast<std::uint16_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x7A));
+                    write_number(static_cast<std::uint32_t>(N));
+                }
+                // LCOV_EXCL_START
+                else if (N <= (std::numeric_limits<std::uint64_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x7B));
+                    write_number(static_cast<std::uint64_t>(N));
+                }
+                // LCOV_EXCL_STOP
+
+                // step 2: write the string
+                oa->write_characters(
+                    reinterpret_cast<const CharType*>(j.m_value.string->c_str()),
+                    j.m_value.string->size());
+                break;
+            }
+
+            case value_t::array:
+            {
+                // step 1: write control byte and the array size
+                const auto N = j.m_value.array->size();
+                if (N <= 0x17)
+                {
+                    write_number(static_cast<std::uint8_t>(0x80 + N));
+                }
+                else if (N <= (std::numeric_limits<std::uint8_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x98));
+                    write_number(static_cast<std::uint8_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x99));
+                    write_number(static_cast<std::uint16_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x9A));
+                    write_number(static_cast<std::uint32_t>(N));
+                }
+                // LCOV_EXCL_START
+                else if (N <= (std::numeric_limits<std::uint64_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x9B));
+                    write_number(static_cast<std::uint64_t>(N));
+                }
+                // LCOV_EXCL_STOP
+
+                // step 2: write each element
+                for (const auto& el : *j.m_value.array)
+                {
+                    write_cbor(el);
+                }
+                break;
+            }
+
+            case value_t::binary:
+            {
+                if (j.m_value.binary->has_subtype())
+                {
+                    if (j.m_value.binary->subtype() <= (std::numeric_limits<std::uint8_t>::max)())
+                    {
+                        write_number(static_cast<std::uint8_t>(0xd8));
+                        write_number(static_cast<std::uint8_t>(j.m_value.binary->subtype()));
+                    }
+                    else if (j.m_value.binary->subtype() <= (std::numeric_limits<std::uint16_t>::max)())
+                    {
+                        write_number(static_cast<std::uint8_t>(0xd9));
+                        write_number(static_cast<std::uint16_t>(j.m_value.binary->subtype()));
+                    }
+                    else if (j.m_value.binary->subtype() <= (std::numeric_limits<std::uint32_t>::max)())
+                    {
+                        write_number(static_cast<std::uint8_t>(0xda));
+                        write_number(static_cast<std::uint32_t>(j.m_value.binary->subtype()));
+                    }
+                    else if (j.m_value.binary->subtype() <= (std::numeric_limits<std::uint64_t>::max)())
+                    {
+                        write_number(static_cast<std::uint8_t>(0xdb));
+                        write_number(static_cast<std::uint64_t>(j.m_value.binary->subtype()));
+                    }
+                }
+
+                // step 1: write control byte and the binary array size
+                const auto N = j.m_value.binary->size();
+                if (N <= 0x17)
+                {
+                    write_number(static_cast<std::uint8_t>(0x40 + N));
+                }
+                else if (N <= (std::numeric_limits<std::uint8_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x58));
+                    write_number(static_cast<std::uint8_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x59));
+                    write_number(static_cast<std::uint16_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x5A));
+                    write_number(static_cast<std::uint32_t>(N));
+                }
+                // LCOV_EXCL_START
+                else if (N <= (std::numeric_limits<std::uint64_t>::max)())
+                {
+                    oa->write_character(to_char_type(0x5B));
+                    write_number(static_cast<std::uint64_t>(N));
+                }
+                // LCOV_EXCL_STOP
+
+                // step 2: write each element
+                oa->write_characters(
+                    reinterpret_cast<const CharType*>(j.m_value.binary->data()),
+                    N);
+
+                break;
+            }
+
+            case value_t::object:
+            {
+                // step 1: write control byte and the object size
+                const auto N = j.m_value.object->size();
+                if (N <= 0x17)
+                {
+                    write_number(static_cast<std::uint8_t>(0xA0 + N));
+                }
+                else if (N <= (std::numeric_limits<std::uint8_t>::max)())
+                {
+                    oa->write_character(to_char_type(0xB8));
+                    write_number(static_cast<std::uint8_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
+                {
+                    oa->write_character(to_char_type(0xB9));
+                    write_number(static_cast<std::uint16_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
+                {
+                    oa->write_character(to_char_type(0xBA));
+                    write_number(static_cast<std::uint32_t>(N));
+                }
+                // LCOV_EXCL_START
+                else if (N <= (std::numeric_limits<std::uint64_t>::max)())
+                {
+                    oa->write_character(to_char_type(0xBB));
+                    write_number(static_cast<std::uint64_t>(N));
+                }
+                // LCOV_EXCL_STOP
+
+                // step 2: write each element
+                for (const auto& el : *j.m_value.object)
+                {
+                    write_cbor(el.first);
+                    write_cbor(el.second);
+                }
+                break;
+            }
+
+            case value_t::discarded:
+            default:
+                break;
+        }
+    }
+
+    /*!
+    @param[in] j  JSON value to serialize
+    */
+    void write_msgpack(const BasicJsonType& j)
+    {
+        switch (j.type())
+        {
+            case value_t::null: // nil
+            {
+                oa->write_character(to_char_type(0xC0));
+                break;
+            }
+
+            case value_t::boolean: // true and false
+            {
+                oa->write_character(j.m_value.boolean
+                                    ? to_char_type(0xC3)
+                                    : to_char_type(0xC2));
+                break;
+            }
+
+            case value_t::number_integer:
+            {
+                if (j.m_value.number_integer >= 0)
+                {
+                    // MessagePack does not differentiate between positive
+                    // signed integers and unsigned integers. Therefore, we used
+                    // the code from the value_t::number_unsigned case here.
+                    if (j.m_value.number_unsigned < 128)
+                    {
+                        // positive fixnum
+                        write_number(static_cast<std::uint8_t>(j.m_value.number_integer));
+                    }
+                    else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint8_t>::max)())
+                    {
+                        // uint 8
+                        oa->write_character(to_char_type(0xCC));
+                        write_number(static_cast<std::uint8_t>(j.m_value.number_integer));
+                    }
+                    else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint16_t>::max)())
+                    {
+                        // uint 16
+                        oa->write_character(to_char_type(0xCD));
+                        write_number(static_cast<std::uint16_t>(j.m_value.number_integer));
+                    }
+                    else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint32_t>::max)())
+                    {
+                        // uint 32
+                        oa->write_character(to_char_type(0xCE));
+                        write_number(static_cast<std::uint32_t>(j.m_value.number_integer));
+                    }
+                    else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint64_t>::max)())
+                    {
+                        // uint 64
+                        oa->write_character(to_char_type(0xCF));
+                        write_number(static_cast<std::uint64_t>(j.m_value.number_integer));
+                    }
+                }
+                else
+                {
+                    if (j.m_value.number_integer >= -32)
+                    {
+                        // negative fixnum
+                        write_number(static_cast<std::int8_t>(j.m_value.number_integer));
+                    }
+                    else if (j.m_value.number_integer >= (std::numeric_limits<std::int8_t>::min)() &&
+                             j.m_value.number_integer <= (std::numeric_limits<std::int8_t>::max)())
+                    {
+                        // int 8
+                        oa->write_character(to_char_type(0xD0));
+                        write_number(static_cast<std::int8_t>(j.m_value.number_integer));
+                    }
+                    else if (j.m_value.number_integer >= (std::numeric_limits<std::int16_t>::min)() &&
+                             j.m_value.number_integer <= (std::numeric_limits<std::int16_t>::max)())
+                    {
+                        // int 16
+                        oa->write_character(to_char_type(0xD1));
+                        write_number(static_cast<std::int16_t>(j.m_value.number_integer));
+                    }
+                    else if (j.m_value.number_integer >= (std::numeric_limits<std::int32_t>::min)() &&
+                             j.m_value.number_integer <= (std::numeric_limits<std::int32_t>::max)())
+                    {
+                        // int 32
+                        oa->write_character(to_char_type(0xD2));
+                        write_number(static_cast<std::int32_t>(j.m_value.number_integer));
+                    }
+                    else if (j.m_value.number_integer >= (std::numeric_limits<std::int64_t>::min)() &&
+                             j.m_value.number_integer <= (std::numeric_limits<std::int64_t>::max)())
+                    {
+                        // int 64
+                        oa->write_character(to_char_type(0xD3));
+                        write_number(static_cast<std::int64_t>(j.m_value.number_integer));
+                    }
+                }
+                break;
+            }
+
+            case value_t::number_unsigned:
+            {
+                if (j.m_value.number_unsigned < 128)
+                {
+                    // positive fixnum
+                    write_number(static_cast<std::uint8_t>(j.m_value.number_integer));
+                }
+                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint8_t>::max)())
+                {
+                    // uint 8
+                    oa->write_character(to_char_type(0xCC));
+                    write_number(static_cast<std::uint8_t>(j.m_value.number_integer));
+                }
+                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint16_t>::max)())
+                {
+                    // uint 16
+                    oa->write_character(to_char_type(0xCD));
+                    write_number(static_cast<std::uint16_t>(j.m_value.number_integer));
+                }
+                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint32_t>::max)())
+                {
+                    // uint 32
+                    oa->write_character(to_char_type(0xCE));
+                    write_number(static_cast<std::uint32_t>(j.m_value.number_integer));
+                }
+                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint64_t>::max)())
+                {
+                    // uint 64
+                    oa->write_character(to_char_type(0xCF));
+                    write_number(static_cast<std::uint64_t>(j.m_value.number_integer));
+                }
+                break;
+            }
+
+            case value_t::number_float:
+            {
+                write_compact_float(j.m_value.number_float, detail::input_format_t::msgpack);
+                break;
+            }
+
+            case value_t::string:
+            {
+                // step 1: write control byte and the string length
+                const auto N = j.m_value.string->size();
+                if (N <= 31)
+                {
+                    // fixstr
+                    write_number(static_cast<std::uint8_t>(0xA0 | N));
+                }
+                else if (N <= (std::numeric_limits<std::uint8_t>::max)())
+                {
+                    // str 8
+                    oa->write_character(to_char_type(0xD9));
+                    write_number(static_cast<std::uint8_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
+                {
+                    // str 16
+                    oa->write_character(to_char_type(0xDA));
+                    write_number(static_cast<std::uint16_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
+                {
+                    // str 32
+                    oa->write_character(to_char_type(0xDB));
+                    write_number(static_cast<std::uint32_t>(N));
+                }
+
+                // step 2: write the string
+                oa->write_characters(
+                    reinterpret_cast<const CharType*>(j.m_value.string->c_str()),
+                    j.m_value.string->size());
+                break;
+            }
+
+            case value_t::array:
+            {
+                // step 1: write control byte and the array size
+                const auto N = j.m_value.array->size();
+                if (N <= 15)
+                {
+                    // fixarray
+                    write_number(static_cast<std::uint8_t>(0x90 | N));
+                }
+                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
+                {
+                    // array 16
+                    oa->write_character(to_char_type(0xDC));
+                    write_number(static_cast<std::uint16_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
+                {
+                    // array 32
+                    oa->write_character(to_char_type(0xDD));
+                    write_number(static_cast<std::uint32_t>(N));
+                }
+
+                // step 2: write each element
+                for (const auto& el : *j.m_value.array)
+                {
+                    write_msgpack(el);
+                }
+                break;
+            }
+
+            case value_t::binary:
+            {
+                // step 0: determine if the binary type has a set subtype to
+                // determine whether or not to use the ext or fixext types
+                const bool use_ext = j.m_value.binary->has_subtype();
+
+                // step 1: write control byte and the byte string length
+                const auto N = j.m_value.binary->size();
+                if (N <= (std::numeric_limits<std::uint8_t>::max)())
+                {
+                    std::uint8_t output_type{};
+                    bool fixed = true;
+                    if (use_ext)
+                    {
+                        switch (N)
+                        {
+                            case 1:
+                                output_type = 0xD4; // fixext 1
+                                break;
+                            case 2:
+                                output_type = 0xD5; // fixext 2
+                                break;
+                            case 4:
+                                output_type = 0xD6; // fixext 4
+                                break;
+                            case 8:
+                                output_type = 0xD7; // fixext 8
+                                break;
+                            case 16:
+                                output_type = 0xD8; // fixext 16
+                                break;
+                            default:
+                                output_type = 0xC7; // ext 8
+                                fixed = false;
+                                break;
+                        }
+
+                    }
+                    else
+                    {
+                        output_type = 0xC4; // bin 8
+                        fixed = false;
+                    }
+
+                    oa->write_character(to_char_type(output_type));
+                    if (!fixed)
+                    {
+                        write_number(static_cast<std::uint8_t>(N));
+                    }
+                }
+                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
+                {
+                    std::uint8_t output_type = use_ext
+                                               ? 0xC8 // ext 16
+                                               : 0xC5; // bin 16
+
+                    oa->write_character(to_char_type(output_type));
+                    write_number(static_cast<std::uint16_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
+                {
+                    std::uint8_t output_type = use_ext
+                                               ? 0xC9 // ext 32
+                                               : 0xC6; // bin 32
+
+                    oa->write_character(to_char_type(output_type));
+                    write_number(static_cast<std::uint32_t>(N));
+                }
+
+                // step 1.5: if this is an ext type, write the subtype
+                if (use_ext)
+                {
+                    write_number(static_cast<std::int8_t>(j.m_value.binary->subtype()));
+                }
+
+                // step 2: write the byte string
+                oa->write_characters(
+                    reinterpret_cast<const CharType*>(j.m_value.binary->data()),
+                    N);
+
+                break;
+            }
+
+            case value_t::object:
+            {
+                // step 1: write control byte and the object size
+                const auto N = j.m_value.object->size();
+                if (N <= 15)
+                {
+                    // fixmap
+                    write_number(static_cast<std::uint8_t>(0x80 | (N & 0xF)));
+                }
+                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
+                {
+                    // map 16
+                    oa->write_character(to_char_type(0xDE));
+                    write_number(static_cast<std::uint16_t>(N));
+                }
+                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
+                {
+                    // map 32
+                    oa->write_character(to_char_type(0xDF));
+                    write_number(static_cast<std::uint32_t>(N));
+                }
+
+                // step 2: write each element
+                for (const auto& el : *j.m_value.object)
+                {
+                    write_msgpack(el.first);
+                    write_msgpack(el.second);
+                }
+                break;
+            }
+
+            case value_t::discarded:
+            default:
+                break;
+        }
+    }
+
+    /*!
+    @param[in] j  JSON value to serialize
+    @param[in] use_count   whether to use '#' prefixes (optimized format)
+    @param[in] use_type    whether to use '$' prefixes (optimized format)
+    @param[in] add_prefix  whether prefixes need to be used for this value
+    @param[in] use_bjdata  whether write in BJData format, default is false
+    */
+    void write_ubjson(const BasicJsonType& j, const bool use_count,
+                      const bool use_type, const bool add_prefix = true,
+                      const bool use_bjdata = false)
+    {
+        switch (j.type())
+        {
+            case value_t::null:
+            {
+                if (add_prefix)
+                {
+                    oa->write_character(to_char_type('Z'));
+                }
+                break;
+            }
+
+            case value_t::boolean:
+            {
+                if (add_prefix)
+                {
+                    oa->write_character(j.m_value.boolean
+                                        ? to_char_type('T')
+                                        : to_char_type('F'));
+                }
+                break;
+            }
+
+            case value_t::number_integer:
+            {
+                write_number_with_ubjson_prefix(j.m_value.number_integer, add_prefix, use_bjdata);
+                break;
+            }
+
+            case value_t::number_unsigned:
+            {
+                write_number_with_ubjson_prefix(j.m_value.number_unsigned, add_prefix, use_bjdata);
+                break;
+            }
+
+            case value_t::number_float:
+            {
+                write_number_with_ubjson_prefix(j.m_value.number_float, add_prefix, use_bjdata);
+                break;
+            }
+
+            case value_t::string:
+            {
+                if (add_prefix)
+                {
+                    oa->write_character(to_char_type('S'));
+                }
+                write_number_with_ubjson_prefix(j.m_value.string->size(), true, use_bjdata);
+                oa->write_characters(
+                    reinterpret_cast<const CharType*>(j.m_value.string->c_str()),
+                    j.m_value.string->size());
+                break;
+            }
+
+            case value_t::array:
+            {
+                if (add_prefix)
+                {
+                    oa->write_character(to_char_type('['));
+                }
+
+                bool prefix_required = true;
+                if (use_type && !j.m_value.array->empty())
+                {
+                    JSON_ASSERT(use_count);
+                    const CharType first_prefix = ubjson_prefix(j.front(), use_bjdata);
+                    const bool same_prefix = std::all_of(j.begin() + 1, j.end(),
+                                                         [this, first_prefix, use_bjdata](const BasicJsonType & v)
+                    {
+                        return ubjson_prefix(v, use_bjdata) == first_prefix;
+                    });
+
+                    std::vector<CharType> bjdx = {'[', '{', 'S', 'H', 'T', 'F', 'N', 'Z'}; // excluded markers in bjdata optimized type
+
+                    if (same_prefix && !(use_bjdata && std::find(bjdx.begin(), bjdx.end(), first_prefix) != bjdx.end()))
+                    {
+                        prefix_required = false;
+                        oa->write_character(to_char_type('$'));
+                        oa->write_character(first_prefix);
+                    }
+                }
+
+                if (use_count)
+                {
+                    oa->write_character(to_char_type('#'));
+                    write_number_with_ubjson_prefix(j.m_value.array->size(), true, use_bjdata);
+                }
+
+                for (const auto& el : *j.m_value.array)
+                {
+                    write_ubjson(el, use_count, use_type, prefix_required, use_bjdata);
+                }
+
+                if (!use_count)
+                {
+                    oa->write_character(to_char_type(']'));
+                }
+
+                break;
+            }
+
+            case value_t::binary:
+            {
+                if (add_prefix)
+                {
+                    oa->write_character(to_char_type('['));
+                }
+
+                if (use_type && !j.m_value.binary->empty())
+                {
+                    JSON_ASSERT(use_count);
+                    oa->write_character(to_char_type('$'));
+                    oa->write_character('U');
+                }
+
+                if (use_count)
+                {
+                    oa->write_character(to_char_type('#'));
+                    write_number_with_ubjson_prefix(j.m_value.binary->size(), true, use_bjdata);
+                }
+
+                if (use_type)
+                {
+                    oa->write_characters(
+                        reinterpret_cast<const CharType*>(j.m_value.binary->data()),
+                        j.m_value.binary->size());
+                }
+                else
+                {
+                    for (size_t i = 0; i < j.m_value.binary->size(); ++i)
+                    {
+                        oa->write_character(to_char_type('U'));
+                        oa->write_character(j.m_value.binary->data()[i]);
+                    }
+                }
+
+                if (!use_count)
+                {
+                    oa->write_character(to_char_type(']'));
+                }
+
+                break;
+            }
+
+            case value_t::object:
+            {
+                if (use_bjdata && j.m_value.object->size() == 3 && j.m_value.object->find("_ArrayType_") != j.m_value.object->end() && j.m_value.object->find("_ArraySize_") != j.m_value.object->end() && j.m_value.object->find("_ArrayData_") != j.m_value.object->end())
+                {
+                    if (!write_bjdata_ndarray(*j.m_value.object, use_count, use_type))  // decode bjdata ndarray in the JData format (https://github.com/NeuroJSON/jdata)
+                    {
+                        break;
+                    }
+                }
+
+                if (add_prefix)
+                {
+                    oa->write_character(to_char_type('{'));
+                }
+
+                bool prefix_required = true;
+                if (use_type && !j.m_value.object->empty())
+                {
+                    JSON_ASSERT(use_count);
+                    const CharType first_prefix = ubjson_prefix(j.front(), use_bjdata);
+                    const bool same_prefix = std::all_of(j.begin(), j.end(),
+                                                         [this, first_prefix, use_bjdata](const BasicJsonType & v)
+                    {
+                        return ubjson_prefix(v, use_bjdata) == first_prefix;
+                    });
+
+                    std::vector<CharType> bjdx = {'[', '{', 'S', 'H', 'T', 'F', 'N', 'Z'}; // excluded markers in bjdata optimized type
+
+                    if (same_prefix && !(use_bjdata && std::find(bjdx.begin(), bjdx.end(), first_prefix) != bjdx.end()))
+                    {
+                        prefix_required = false;
+                        oa->write_character(to_char_type('$'));
+                        oa->write_character(first_prefix);
+                    }
+                }
+
+                if (use_count)
+                {
+                    oa->write_character(to_char_type('#'));
+                    write_number_with_ubjson_prefix(j.m_value.object->size(), true, use_bjdata);
+                }
+
+                for (const auto& el : *j.m_value.object)
+                {
+                    write_number_with_ubjson_prefix(el.first.size(), true, use_bjdata);
+                    oa->write_characters(
+                        reinterpret_cast<const CharType*>(el.first.c_str()),
+                        el.first.size());
+                    write_ubjson(el.second, use_count, use_type, prefix_required, use_bjdata);
+                }
+
+                if (!use_count)
+                {
+                    oa->write_character(to_char_type('}'));
+                }
+
+                break;
+            }
+
+            case value_t::discarded:
+            default:
+                break;
+        }
+    }
+
+  private:
+    //////////
+    // BSON //
+    //////////
+
+    /*!
+    @return The size of a BSON document entry header, including the id marker
+            and the entry name size (and its null-terminator).
+    */
+    static std::size_t calc_bson_entry_header_size(const string_t& name, const BasicJsonType& j)
+    {
+        const auto it = name.find(static_cast<typename string_t::value_type>(0));
+        if (JSON_HEDLEY_UNLIKELY(it != BasicJsonType::string_t::npos))
+        {
+            JSON_THROW(out_of_range::create(409, concat("BSON key cannot contain code point U+0000 (at byte ", std::to_string(it), ")"), &j));
+            static_cast<void>(j);
+        }
+
+        return /*id*/ 1ul + name.size() + /*zero-terminator*/1u;
+    }
+
+    /*!
+    @brief Writes the given @a element_type and @a name to the output adapter
+    */
+    void write_bson_entry_header(const string_t& name,
+                                 const std::uint8_t element_type)
+    {
+        oa->write_character(to_char_type(element_type)); // boolean
+        oa->write_characters(
+            reinterpret_cast<const CharType*>(name.c_str()),
+            name.size() + 1u);
+    }
+
+    /*!
+    @brief Writes a BSON element with key @a name and boolean value @a value
+    */
+    void write_bson_boolean(const string_t& name,
+                            const bool value)
+    {
+        write_bson_entry_header(name, 0x08);
+        oa->write_character(value ? to_char_type(0x01) : to_char_type(0x00));
+    }
+
+    /*!
+    @brief Writes a BSON element with key @a name and double value @a value
+    */
+    void write_bson_double(const string_t& name,
+                           const double value)
+    {
+        write_bson_entry_header(name, 0x01);
+        write_number<double>(value, true);
+    }
+
+    /*!
+    @return The size of the BSON-encoded string in @a value
+    */
+    static std::size_t calc_bson_string_size(const string_t& value)
+    {
+        return sizeof(std::int32_t) + value.size() + 1ul;
+    }
+
+    /*!
+    @brief Writes a BSON element with key @a name and string value @a value
+    */
+    void write_bson_string(const string_t& name,
+                           const string_t& value)
+    {
+        write_bson_entry_header(name, 0x02);
+
+        write_number<std::int32_t>(static_cast<std::int32_t>(value.size() + 1ul), true);
+        oa->write_characters(
+            reinterpret_cast<const CharType*>(value.c_str()),
+            value.size() + 1);
+    }
+
+    /*!
+    @brief Writes a BSON element with key @a name and null value
+    */
+    void write_bson_null(const string_t& name)
+    {
+        write_bson_entry_header(name, 0x0A);
+    }
+
+    /*!
+    @return The size of the BSON-encoded integer @a value
+    */
+    static std::size_t calc_bson_integer_size(const std::int64_t value)
+    {
+        return (std::numeric_limits<std::int32_t>::min)() <= value && value <= (std::numeric_limits<std::int32_t>::max)()
+               ? sizeof(std::int32_t)
+               : sizeof(std::int64_t);
+    }
+
+    /*!
+    @brief Writes a BSON element with key @a name and integer @a value
+    */
+    void write_bson_integer(const string_t& name,
+                            const std::int64_t value)
+    {
+        if ((std::numeric_limits<std::int32_t>::min)() <= value && value <= (std::numeric_limits<std::int32_t>::max)())
+        {
+            write_bson_entry_header(name, 0x10); // int32
+            write_number<std::int32_t>(static_cast<std::int32_t>(value), true);
+        }
+        else
+        {
+            write_bson_entry_header(name, 0x12); // int64
+            write_number<std::int64_t>(static_cast<std::int64_t>(value), true);
+        }
+    }
+
+    /*!
+    @return The size of the BSON-encoded unsigned integer in @a j
+    */
+    static constexpr std::size_t calc_bson_unsigned_size(const std::uint64_t value) noexcept
+    {
+        return (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
+               ? sizeof(std::int32_t)
+               : sizeof(std::int64_t);
+    }
+
+    /*!
+    @brief Writes a BSON element with key @a name and unsigned @a value
+    */
+    void write_bson_unsigned(const string_t& name,
+                             const BasicJsonType& j)
+    {
+        if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
+        {
+            write_bson_entry_header(name, 0x10 /* int32 */);
+            write_number<std::int32_t>(static_cast<std::int32_t>(j.m_value.number_unsigned), true);
+        }
+        else if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int64_t>::max)()))
+        {
+            write_bson_entry_header(name, 0x12 /* int64 */);
+            write_number<std::int64_t>(static_cast<std::int64_t>(j.m_value.number_unsigned), true);
+        }
+        else
+        {
+            JSON_THROW(out_of_range::create(407, concat("integer number ", std::to_string(j.m_value.number_unsigned), " cannot be represented by BSON as it does not fit int64"), &j));
+        }
+    }
+
+    /*!
+    @brief Writes a BSON element with key @a name and object @a value
+    */
+    void write_bson_object_entry(const string_t& name,
+                                 const typename BasicJsonType::object_t& value)
+    {
+        write_bson_entry_header(name, 0x03); // object
+        write_bson_object(value);
+    }
+
+    /*!
+    @return The size of the BSON-encoded array @a value
+    */
+    static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value)
+    {
+        std::size_t array_index = 0ul;
+
+        const std::size_t embedded_document_size = std::accumulate(std::begin(value), std::end(value), static_cast<std::size_t>(0), [&array_index](std::size_t result, const typename BasicJsonType::array_t::value_type & el)
+        {
+            return result + calc_bson_element_size(std::to_string(array_index++), el);
+        });
+
+        return sizeof(std::int32_t) + embedded_document_size + 1ul;
+    }
+
+    /*!
+    @return The size of the BSON-encoded binary array @a value
+    */
+    static std::size_t calc_bson_binary_size(const typename BasicJsonType::binary_t& value)
+    {
+        return sizeof(std::int32_t) + value.size() + 1ul;
+    }
+
+    /*!
+    @brief Writes a BSON element with key @a name and array @a value
+    */
+    void write_bson_array(const string_t& name,
+                          const typename BasicJsonType::array_t& value)
+    {
+        write_bson_entry_header(name, 0x04); // array
+        write_number<std::int32_t>(static_cast<std::int32_t>(calc_bson_array_size(value)), true);
+
+        std::size_t array_index = 0ul;
+
+        for (const auto& el : value)
+        {
+            write_bson_element(std::to_string(array_index++), el);
+        }
+
+        oa->write_character(to_char_type(0x00));
+    }
+
+    /*!
+    @brief Writes a BSON element with key @a name and binary value @a value
+    */
+    void write_bson_binary(const string_t& name,
+                           const binary_t& value)
+    {
+        write_bson_entry_header(name, 0x05);
+
+        write_number<std::int32_t>(static_cast<std::int32_t>(value.size()), true);
+        write_number(value.has_subtype() ? static_cast<std::uint8_t>(value.subtype()) : static_cast<std::uint8_t>(0x00));
+
+        oa->write_characters(reinterpret_cast<const CharType*>(value.data()), value.size());
+    }
+
+    /*!
+    @brief Calculates the size necessary to serialize the JSON value @a j with its @a name
+    @return The calculated size for the BSON document entry for @a j with the given @a name.
+    */
+    static std::size_t calc_bson_element_size(const string_t& name,
+            const BasicJsonType& j)
+    {
+        const auto header_size = calc_bson_entry_header_size(name, j);
+        switch (j.type())
+        {
+            case value_t::object:
+                return header_size + calc_bson_object_size(*j.m_value.object);
+
+            case value_t::array:
+                return header_size + calc_bson_array_size(*j.m_value.array);
+
+            case value_t::binary:
+                return header_size + calc_bson_binary_size(*j.m_value.binary);
+
+            case value_t::boolean:
+                return header_size + 1ul;
+
+            case value_t::number_float:
+                return header_size + 8ul;
+
+            case value_t::number_integer:
+                return header_size + calc_bson_integer_size(j.m_value.number_integer);
+
+            case value_t::number_unsigned:
+                return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned);
+
+            case value_t::string:
+                return header_size + calc_bson_string_size(*j.m_value.string);
+
+            case value_t::null:
+                return header_size + 0ul;
+
+            // LCOV_EXCL_START
+            case value_t::discarded:
+            default:
+                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert)
+                return 0ul;
+                // LCOV_EXCL_STOP
+        }
+    }
+
+    /*!
+    @brief Serializes the JSON value @a j to BSON and associates it with the
+           key @a name.
+    @param name The name to associate with the JSON entity @a j within the
+                current BSON document
+    */
+    void write_bson_element(const string_t& name,
+                            const BasicJsonType& j)
+    {
+        switch (j.type())
+        {
+            case value_t::object:
+                return write_bson_object_entry(name, *j.m_value.object);
+
+            case value_t::array:
+                return write_bson_array(name, *j.m_value.array);
+
+            case value_t::binary:
+                return write_bson_binary(name, *j.m_value.binary);
+
+            case value_t::boolean:
+                return write_bson_boolean(name, j.m_value.boolean);
+
+            case value_t::number_float:
+                return write_bson_double(name, j.m_value.number_float);
+
+            case value_t::number_integer:
+                return write_bson_integer(name, j.m_value.number_integer);
+
+            case value_t::number_unsigned:
+                return write_bson_unsigned(name, j);
+
+            case value_t::string:
+                return write_bson_string(name, *j.m_value.string);
+
+            case value_t::null:
+                return write_bson_null(name);
+
+            // LCOV_EXCL_START
+            case value_t::discarded:
+            default:
+                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert)
+                return;
+                // LCOV_EXCL_STOP
+        }
+    }
+
+    /*!
+    @brief Calculates the size of the BSON serialization of the given
+           JSON-object @a j.
+    @param[in] value  JSON value to serialize
+    @pre       value.type() == value_t::object
+    */
+    static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value)
+    {
+        std::size_t document_size = std::accumulate(value.begin(), value.end(), static_cast<std::size_t>(0),
+                                    [](size_t result, const typename BasicJsonType::object_t::value_type & el)
+        {
+            return result += calc_bson_element_size(el.first, el.second);
+        });
+
+        return sizeof(std::int32_t) + document_size + 1ul;
+    }
+
+    /*!
+    @param[in] value  JSON value to serialize
+    @pre       value.type() == value_t::object
+    */
+    void write_bson_object(const typename BasicJsonType::object_t& value)
+    {
+        write_number<std::int32_t>(static_cast<std::int32_t>(calc_bson_object_size(value)), true);
+
+        for (const auto& el : value)
+        {
+            write_bson_element(el.first, el.second);
+        }
+
+        oa->write_character(to_char_type(0x00));
+    }
+
+    //////////
+    // CBOR //
+    //////////
+
+    static constexpr CharType get_cbor_float_prefix(float /*unused*/)
+    {
+        return to_char_type(0xFA);  // Single-Precision Float
+    }
+
+    static constexpr CharType get_cbor_float_prefix(double /*unused*/)
+    {
+        return to_char_type(0xFB);  // Double-Precision Float
+    }
+
+    /////////////
+    // MsgPack //
+    /////////////
+
+    static constexpr CharType get_msgpack_float_prefix(float /*unused*/)
+    {
+        return to_char_type(0xCA);  // float 32
+    }
+
+    static constexpr CharType get_msgpack_float_prefix(double /*unused*/)
+    {
+        return to_char_type(0xCB);  // float 64
+    }
+
+    ////////////
+    // UBJSON //
+    ////////////
+
+    // UBJSON: write number (floating point)
+    template<typename NumberType, typename std::enable_if<
+                 std::is_floating_point<NumberType>::value, int>::type = 0>
+    void write_number_with_ubjson_prefix(const NumberType n,
+                                         const bool add_prefix,
+                                         const bool use_bjdata)
+    {
+        if (add_prefix)
+        {
+            oa->write_character(get_ubjson_float_prefix(n));
+        }
+        write_number(n, use_bjdata);
+    }
+
+    // UBJSON: write number (unsigned integer)
+    template<typename NumberType, typename std::enable_if<
+                 std::is_unsigned<NumberType>::value, int>::type = 0>
+    void write_number_with_ubjson_prefix(const NumberType n,
+                                         const bool add_prefix,
+                                         const bool use_bjdata)
+    {
+        if (n <= static_cast<std::uint64_t>((std::numeric_limits<std::int8_t>::max)()))
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('i'));  // int8
+            }
+            write_number(static_cast<std::uint8_t>(n), use_bjdata);
+        }
+        else if (n <= (std::numeric_limits<std::uint8_t>::max)())
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('U'));  // uint8
+            }
+            write_number(static_cast<std::uint8_t>(n), use_bjdata);
+        }
+        else if (n <= static_cast<std::uint64_t>((std::numeric_limits<std::int16_t>::max)()))
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('I'));  // int16
+            }
+            write_number(static_cast<std::int16_t>(n), use_bjdata);
+        }
+        else if (use_bjdata && n <= static_cast<uint64_t>((std::numeric_limits<uint16_t>::max)()))
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('u'));  // uint16 - bjdata only
+            }
+            write_number(static_cast<std::uint16_t>(n), use_bjdata);
+        }
+        else if (n <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('l'));  // int32
+            }
+            write_number(static_cast<std::int32_t>(n), use_bjdata);
+        }
+        else if (use_bjdata && n <= static_cast<uint64_t>((std::numeric_limits<uint32_t>::max)()))
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('m'));  // uint32 - bjdata only
+            }
+            write_number(static_cast<std::uint32_t>(n), use_bjdata);
+        }
+        else if (n <= static_cast<std::uint64_t>((std::numeric_limits<std::int64_t>::max)()))
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('L'));  // int64
+            }
+            write_number(static_cast<std::int64_t>(n), use_bjdata);
+        }
+        else if (use_bjdata && n <= (std::numeric_limits<uint64_t>::max)())
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('M'));  // uint64 - bjdata only
+            }
+            write_number(static_cast<std::uint64_t>(n), use_bjdata);
+        }
+        else
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('H'));  // high-precision number
+            }
+
+            const auto number = BasicJsonType(n).dump();
+            write_number_with_ubjson_prefix(number.size(), true, use_bjdata);
+            for (std::size_t i = 0; i < number.size(); ++i)
+            {
+                oa->write_character(to_char_type(static_cast<std::uint8_t>(number[i])));
+            }
+        }
+    }
+
+    // UBJSON: write number (signed integer)
+    template < typename NumberType, typename std::enable_if <
+                   std::is_signed<NumberType>::value&&
+                   !std::is_floating_point<NumberType>::value, int >::type = 0 >
+    void write_number_with_ubjson_prefix(const NumberType n,
+                                         const bool add_prefix,
+                                         const bool use_bjdata)
+    {
+        if ((std::numeric_limits<std::int8_t>::min)() <= n && n <= (std::numeric_limits<std::int8_t>::max)())
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('i'));  // int8
+            }
+            write_number(static_cast<std::int8_t>(n), use_bjdata);
+        }
+        else if (static_cast<std::int64_t>((std::numeric_limits<std::uint8_t>::min)()) <= n && n <= static_cast<std::int64_t>((std::numeric_limits<std::uint8_t>::max)()))
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('U'));  // uint8
+            }
+            write_number(static_cast<std::uint8_t>(n), use_bjdata);
+        }
+        else if ((std::numeric_limits<std::int16_t>::min)() <= n && n <= (std::numeric_limits<std::int16_t>::max)())
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('I'));  // int16
+            }
+            write_number(static_cast<std::int16_t>(n), use_bjdata);
+        }
+        else if (use_bjdata && (static_cast<std::int64_t>((std::numeric_limits<std::uint16_t>::min)()) <= n && n <= static_cast<std::int64_t>((std::numeric_limits<std::uint16_t>::max)())))
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('u'));  // uint16 - bjdata only
+            }
+            write_number(static_cast<uint16_t>(n), use_bjdata);
+        }
+        else if ((std::numeric_limits<std::int32_t>::min)() <= n && n <= (std::numeric_limits<std::int32_t>::max)())
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('l'));  // int32
+            }
+            write_number(static_cast<std::int32_t>(n), use_bjdata);
+        }
+        else if (use_bjdata && (static_cast<std::int64_t>((std::numeric_limits<std::uint32_t>::min)()) <= n && n <= static_cast<std::int64_t>((std::numeric_limits<std::uint32_t>::max)())))
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('m'));  // uint32 - bjdata only
+            }
+            write_number(static_cast<uint32_t>(n), use_bjdata);
+        }
+        else if ((std::numeric_limits<std::int64_t>::min)() <= n && n <= (std::numeric_limits<std::int64_t>::max)())
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('L'));  // int64
+            }
+            write_number(static_cast<std::int64_t>(n), use_bjdata);
+        }
+        // LCOV_EXCL_START
+        else
+        {
+            if (add_prefix)
+            {
+                oa->write_character(to_char_type('H'));  // high-precision number
+            }
+
+            const auto number = BasicJsonType(n).dump();
+            write_number_with_ubjson_prefix(number.size(), true, use_bjdata);
+            for (std::size_t i = 0; i < number.size(); ++i)
+            {
+                oa->write_character(to_char_type(static_cast<std::uint8_t>(number[i])));
+            }
+        }
+        // LCOV_EXCL_STOP
+    }
+
+    /*!
+    @brief determine the type prefix of container values
+    */
+    CharType ubjson_prefix(const BasicJsonType& j, const bool use_bjdata) const noexcept
+    {
+        switch (j.type())
+        {
+            case value_t::null:
+                return 'Z';
+
+            case value_t::boolean:
+                return j.m_value.boolean ? 'T' : 'F';
+
+            case value_t::number_integer:
+            {
+                if ((std::numeric_limits<std::int8_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::int8_t>::max)())
+                {
+                    return 'i';
+                }
+                if ((std::numeric_limits<std::uint8_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::uint8_t>::max)())
+                {
+                    return 'U';
+                }
+                if ((std::numeric_limits<std::int16_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::int16_t>::max)())
+                {
+                    return 'I';
+                }
+                if (use_bjdata && ((std::numeric_limits<std::uint16_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::uint16_t>::max)()))
+                {
+                    return 'u';
+                }
+                if ((std::numeric_limits<std::int32_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::int32_t>::max)())
+                {
+                    return 'l';
+                }
+                if (use_bjdata && ((std::numeric_limits<std::uint32_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::uint32_t>::max)()))
+                {
+                    return 'm';
+                }
+                if ((std::numeric_limits<std::int64_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::int64_t>::max)())
+                {
+                    return 'L';
+                }
+                // anything else is treated as high-precision number
+                return 'H'; // LCOV_EXCL_LINE
+            }
+
+            case value_t::number_unsigned:
+            {
+                if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int8_t>::max)()))
+                {
+                    return 'i';
+                }
+                if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::uint8_t>::max)()))
+                {
+                    return 'U';
+                }
+                if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int16_t>::max)()))
+                {
+                    return 'I';
+                }
+                if (use_bjdata && j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::uint16_t>::max)()))
+                {
+                    return 'u';
+                }
+                if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
+                {
+                    return 'l';
+                }
+                if (use_bjdata && j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::uint32_t>::max)()))
+                {
+                    return 'm';
+                }
+                if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int64_t>::max)()))
+                {
+                    return 'L';
+                }
+                if (use_bjdata && j.m_value.number_unsigned <= (std::numeric_limits<std::uint64_t>::max)())
+                {
+                    return 'M';
+                }
+                // anything else is treated as high-precision number
+                return 'H'; // LCOV_EXCL_LINE
+            }
+
+            case value_t::number_float:
+                return get_ubjson_float_prefix(j.m_value.number_float);
+
+            case value_t::string:
+                return 'S';
+
+            case value_t::array: // fallthrough
+            case value_t::binary:
+                return '[';
+
+            case value_t::object:
+                return '{';
+
+            case value_t::discarded:
+            default:  // discarded values
+                return 'N';
+        }
+    }
+
+    static constexpr CharType get_ubjson_float_prefix(float /*unused*/)
+    {
+        return 'd';  // float 32
+    }
+
+    static constexpr CharType get_ubjson_float_prefix(double /*unused*/)
+    {
+        return 'D';  // float 64
+    }
+
+    /*!
+    @return false if the object is successfully converted to a bjdata ndarray, true if the type or size is invalid
+    */
+    bool write_bjdata_ndarray(const typename BasicJsonType::object_t& value, const bool use_count, const bool use_type)
+    {
+        std::map<string_t, CharType> bjdtype = {{"uint8", 'U'},  {"int8", 'i'},  {"uint16", 'u'}, {"int16", 'I'},
+            {"uint32", 'm'}, {"int32", 'l'}, {"uint64", 'M'}, {"int64", 'L'}, {"single", 'd'}, {"double", 'D'}, {"char", 'C'}
+        };
+
+        string_t key = "_ArrayType_";
+        auto it = bjdtype.find(static_cast<string_t>(value.at(key)));
+        if (it == bjdtype.end())
+        {
+            return true;
+        }
+        CharType dtype = it->second;
+
+        key = "_ArraySize_";
+        std::size_t len = (value.at(key).empty() ? 0 : 1);
+        for (const auto& el : value.at(key))
+        {
+            len *= static_cast<std::size_t>(el.m_value.number_unsigned);
+        }
+
+        key = "_ArrayData_";
+        if (value.at(key).size() != len)
+        {
+            return true;
+        }
+
+        oa->write_character('[');
+        oa->write_character('$');
+        oa->write_character(dtype);
+        oa->write_character('#');
+
+        key = "_ArraySize_";
+        write_ubjson(value.at(key), use_count, use_type, true,  true);
+
+        key = "_ArrayData_";
+        if (dtype == 'U' || dtype == 'C')
+        {
+            for (const auto& el : value.at(key))
+            {
+                write_number(static_cast<std::uint8_t>(el.m_value.number_unsigned), true);
+            }
+        }
+        else if (dtype == 'i')
+        {
+            for (const auto& el : value.at(key))
+            {
+                write_number(static_cast<std::int8_t>(el.m_value.number_integer), true);
+            }
+        }
+        else if (dtype == 'u')
+        {
+            for (const auto& el : value.at(key))
+            {
+                write_number(static_cast<std::uint16_t>(el.m_value.number_unsigned), true);
+            }
+        }
+        else if (dtype == 'I')
+        {
+            for (const auto& el : value.at(key))
+            {
+                write_number(static_cast<std::int16_t>(el.m_value.number_integer), true);
+            }
+        }
+        else if (dtype == 'm')
+        {
+            for (const auto& el : value.at(key))
+            {
+                write_number(static_cast<std::uint32_t>(el.m_value.number_unsigned), true);
+            }
+        }
+        else if (dtype == 'l')
+        {
+            for (const auto& el : value.at(key))
+            {
+                write_number(static_cast<std::int32_t>(el.m_value.number_integer), true);
+            }
+        }
+        else if (dtype == 'M')
+        {
+            for (const auto& el : value.at(key))
+            {
+                write_number(static_cast<std::uint64_t>(el.m_value.number_unsigned), true);
+            }
+        }
+        else if (dtype == 'L')
+        {
+            for (const auto& el : value.at(key))
+            {
+                write_number(static_cast<std::int64_t>(el.m_value.number_integer), true);
+            }
+        }
+        else if (dtype == 'd')
+        {
+            for (const auto& el : value.at(key))
+            {
+                write_number(static_cast<float>(el.m_value.number_float), true);
+            }
+        }
+        else if (dtype == 'D')
+        {
+            for (const auto& el : value.at(key))
+            {
+                write_number(static_cast<double>(el.m_value.number_float), true);
+            }
+        }
+        return false;
+    }
+
+    ///////////////////////
+    // Utility functions //
+    ///////////////////////
+
+    /*
+    @brief write a number to output input
+    @param[in] n number of type @a NumberType
+    @param[in] OutputIsLittleEndian Set to true if output data is
+                                 required to be little endian
+    @tparam NumberType the type of the number
+
+    @note This function needs to respect the system's endianness, because bytes
+          in CBOR, MessagePack, and UBJSON are stored in network order (big
+          endian) and therefore need reordering on little endian systems.
+          On the other hand, BSON and BJData use little endian and should reorder
+          on big endian systems.
+    */
+    template<typename NumberType>
+    void write_number(const NumberType n, const bool OutputIsLittleEndian = false)
+    {
+        // step 1: write number to array of length NumberType
+        std::array<CharType, sizeof(NumberType)> vec{};
+        std::memcpy(vec.data(), &n, sizeof(NumberType));
+
+        // step 2: write array to output (with possible reordering)
+        if (is_little_endian != OutputIsLittleEndian)
+        {
+            // reverse byte order prior to conversion if necessary
+            std::reverse(vec.begin(), vec.end());
+        }
+
+        oa->write_characters(vec.data(), sizeof(NumberType));
+    }
+
+    void write_compact_float(const number_float_t n, detail::input_format_t format)
+    {
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#endif
+        if (static_cast<double>(n) >= static_cast<double>(std::numeric_limits<float>::lowest()) &&
+                static_cast<double>(n) <= static_cast<double>((std::numeric_limits<float>::max)()) &&
+                static_cast<double>(static_cast<float>(n)) == static_cast<double>(n))
+        {
+            oa->write_character(format == detail::input_format_t::cbor
+                                ? get_cbor_float_prefix(static_cast<float>(n))
+                                : get_msgpack_float_prefix(static_cast<float>(n)));
+            write_number(static_cast<float>(n));
+        }
+        else
+        {
+            oa->write_character(format == detail::input_format_t::cbor
+                                ? get_cbor_float_prefix(n)
+                                : get_msgpack_float_prefix(n));
+            write_number(n);
+        }
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+    }
+
+  public:
+    // The following to_char_type functions are implement the conversion
+    // between uint8_t and CharType. In case CharType is not unsigned,
+    // such a conversion is required to allow values greater than 128.
+    // See <https://github.com/nlohmann/json/issues/1286> for a discussion.
+    template < typename C = CharType,
+               enable_if_t < std::is_signed<C>::value && std::is_signed<char>::value > * = nullptr >
+    static constexpr CharType to_char_type(std::uint8_t x) noexcept
+    {
+        return *reinterpret_cast<char*>(&x);
+    }
+
+    template < typename C = CharType,
+               enable_if_t < std::is_signed<C>::value && std::is_unsigned<char>::value > * = nullptr >
+    static CharType to_char_type(std::uint8_t x) noexcept
+    {
+        static_assert(sizeof(std::uint8_t) == sizeof(CharType), "size of CharType must be equal to std::uint8_t");
+        static_assert(std::is_trivial<CharType>::value, "CharType must be trivial");
+        CharType result;
+        std::memcpy(&result, &x, sizeof(x));
+        return result;
+    }
+
+    template<typename C = CharType,
+             enable_if_t<std::is_unsigned<C>::value>* = nullptr>
+    static constexpr CharType to_char_type(std::uint8_t x) noexcept
+    {
+        return x;
+    }
+
+    template < typename InputCharType, typename C = CharType,
+               enable_if_t <
+                   std::is_signed<C>::value &&
+                   std::is_signed<char>::value &&
+                   std::is_same<char, typename std::remove_cv<InputCharType>::type>::value
+                   > * = nullptr >
+    static constexpr CharType to_char_type(InputCharType x) noexcept
+    {
+        return x;
+    }
+
+  private:
+    /// whether we can assume little endianness
+    const bool is_little_endian = little_endianness();
+
+    /// the output
+    output_adapter_t<CharType> oa = nullptr;
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/output/output_adapters.hpp>
+
+// #include <qualla/detail/output/serializer.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2008-2009 Björn Hoehrmann <bjoern@hoehrmann.de>
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <algorithm> // reverse, remove, fill, find, none_of
+#include <array> // array
+#include <clocale> // localeconv, lconv
+#include <cmath> // labs, isfinite, isnan, signbit
+#include <cstddef> // size_t, ptrdiff_t
+#include <cstdint> // uint8_t
+#include <cstdio> // snprintf
+#include <limits> // numeric_limits
+#include <string> // string, char_traits
+#include <iomanip> // setfill, setw
+#include <type_traits> // is_same
+#include <utility> // move
+
+// #include <qualla/detail/conversions/to_chars.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2009 Florian Loitsch <https://florian.loitsch.com/>
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <array> // array
+#include <cmath>   // signbit, isfinite
+#include <cstdint> // intN_t, uintN_t
+#include <cstring> // memcpy, memmove
+#include <limits> // numeric_limits
+#include <type_traits> // conditional
+
+// #include <qualla/detail/macro_scope.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+/*!
+@brief implements the Grisu2 algorithm for binary to decimal floating-point
+conversion.
+
+This implementation is a slightly modified version of the reference
+implementation which may be obtained from
+http://florian.loitsch.com/publications (bench.tar.gz).
+
+The code is distributed under the MIT license, Copyright (c) 2009 Florian Loitsch.
+
+For a detailed description of the algorithm see:
+
+[1] Loitsch, "Printing Floating-Point Numbers Quickly and Accurately with
+    Integers", Proceedings of the ACM SIGPLAN 2010 Conference on Programming
+    Language Design and Implementation, PLDI 2010
+[2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and Accurately",
+    Proceedings of the ACM SIGPLAN 1996 Conference on Programming Language
+    Design and Implementation, PLDI 1996
+*/
+namespace dtoa_impl
+{
+
+template<typename Target, typename Source>
+Target reinterpret_bits(const Source source)
+{
+    static_assert(sizeof(Target) == sizeof(Source), "size mismatch");
+
+    Target target;
+    std::memcpy(&target, &source, sizeof(Source));
+    return target;
+}
+
+struct diyfp // f * 2^e
+{
+    static constexpr int kPrecision = 64; // = q
+
+    std::uint64_t f = 0;
+    int e = 0;
+
+    constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {}
+
+    /*!
+    @brief returns x - y
+    @pre x.e == y.e and x.f >= y.f
+    */
+    static diyfp sub(const diyfp& x, const diyfp& y) noexcept
+    {
+        JSON_ASSERT(x.e == y.e);
+        JSON_ASSERT(x.f >= y.f);
+
+        return {x.f - y.f, x.e};
+    }
+
+    /*!
+    @brief returns x * y
+    @note The result is rounded. (Only the upper q bits are returned.)
+    */
+    static diyfp mul(const diyfp& x, const diyfp& y) noexcept
+    {
+        static_assert(kPrecision == 64, "internal error");
+
+        // Computes:
+        //  f = round((x.f * y.f) / 2^q)
+        //  e = x.e + y.e + q
+
+        // Emulate the 64-bit * 64-bit multiplication:
+        //
+        // p = u * v
+        //   = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi)
+        //   = (u_lo v_lo         ) + 2^32 ((u_lo v_hi         ) + (u_hi v_lo         )) + 2^64 (u_hi v_hi         )
+        //   = (p0                ) + 2^32 ((p1                ) + (p2                )) + 2^64 (p3                )
+        //   = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo + 2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3                )
+        //   = (p0_lo             ) + 2^32 (p0_hi + p1_lo + p2_lo                      ) + 2^64 (p1_hi + p2_hi + p3)
+        //   = (p0_lo             ) + 2^32 (Q                                          ) + 2^64 (H                 )
+        //   = (p0_lo             ) + 2^32 (Q_lo + 2^32 Q_hi                           ) + 2^64 (H                 )
+        //
+        // (Since Q might be larger than 2^32 - 1)
+        //
+        //   = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H)
+        //
+        // (Q_hi + H does not overflow a 64-bit int)
+        //
+        //   = p_lo + 2^64 p_hi
+
+        const std::uint64_t u_lo = x.f & 0xFFFFFFFFu;
+        const std::uint64_t u_hi = x.f >> 32u;
+        const std::uint64_t v_lo = y.f & 0xFFFFFFFFu;
+        const std::uint64_t v_hi = y.f >> 32u;
+
+        const std::uint64_t p0 = u_lo * v_lo;
+        const std::uint64_t p1 = u_lo * v_hi;
+        const std::uint64_t p2 = u_hi * v_lo;
+        const std::uint64_t p3 = u_hi * v_hi;
+
+        const std::uint64_t p0_hi = p0 >> 32u;
+        const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu;
+        const std::uint64_t p1_hi = p1 >> 32u;
+        const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu;
+        const std::uint64_t p2_hi = p2 >> 32u;
+
+        std::uint64_t Q = p0_hi + p1_lo + p2_lo;
+
+        // The full product might now be computed as
+        //
+        // p_hi = p3 + p2_hi + p1_hi + (Q >> 32)
+        // p_lo = p0_lo + (Q << 32)
+        //
+        // But in this particular case here, the full p_lo is not required.
+        // Effectively we only need to add the highest bit in p_lo to p_hi (and
+        // Q_hi + 1 does not overflow).
+
+        Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up
+
+        const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u);
+
+        return {h, x.e + y.e + 64};
+    }
+
+    /*!
+    @brief normalize x such that the significand is >= 2^(q-1)
+    @pre x.f != 0
+    */
+    static diyfp normalize(diyfp x) noexcept
+    {
+        JSON_ASSERT(x.f != 0);
+
+        while ((x.f >> 63u) == 0)
+        {
+            x.f <<= 1u;
+            x.e--;
+        }
+
+        return x;
+    }
+
+    /*!
+    @brief normalize x such that the result has the exponent E
+    @pre e >= x.e and the upper e - x.e bits of x.f must be zero.
+    */
+    static diyfp normalize_to(const diyfp& x, const int target_exponent) noexcept
+    {
+        const int delta = x.e - target_exponent;
+
+        JSON_ASSERT(delta >= 0);
+        JSON_ASSERT(((x.f << delta) >> delta) == x.f);
+
+        return {x.f << delta, target_exponent};
+    }
+};
+
+struct boundaries
+{
+    diyfp w;
+    diyfp minus;
+    diyfp plus;
+};
+
+/*!
+Compute the (normalized) diyfp representing the input number 'value' and its
+boundaries.
+
+@pre value must be finite and positive
+*/
+template<typename FloatType>
+boundaries compute_boundaries(FloatType value)
+{
+    JSON_ASSERT(std::isfinite(value));
+    JSON_ASSERT(value > 0);
+
+    // Convert the IEEE representation into a diyfp.
+    //
+    // If v is denormal:
+    //      value = 0.F * 2^(1 - bias) = (          F) * 2^(1 - bias - (p-1))
+    // If v is normalized:
+    //      value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1))
+
+    static_assert(std::numeric_limits<FloatType>::is_iec559,
+                  "internal error: dtoa_short requires an IEEE-754 floating-point implementation");
+
+    constexpr int      kPrecision = std::numeric_limits<FloatType>::digits; // = p (includes the hidden bit)
+    constexpr int      kBias      = std::numeric_limits<FloatType>::max_exponent - 1 + (kPrecision - 1);
+    constexpr int      kMinExp    = 1 - kBias;
+    constexpr std::uint64_t kHiddenBit = std::uint64_t{1} << (kPrecision - 1); // = 2^(p-1)
+
+    using bits_type = typename std::conditional<kPrecision == 24, std::uint32_t, std::uint64_t >::type;
+
+    const auto bits = static_cast<std::uint64_t>(reinterpret_bits<bits_type>(value));
+    const std::uint64_t E = bits >> (kPrecision - 1);
+    const std::uint64_t F = bits & (kHiddenBit - 1);
+
+    const bool is_denormal = E == 0;
+    const diyfp v = is_denormal
+                    ? diyfp(F, kMinExp)
+                    : diyfp(F + kHiddenBit, static_cast<int>(E) - kBias);
+
+    // Compute the boundaries m- and m+ of the floating-point value
+    // v = f * 2^e.
+    //
+    // Determine v- and v+, the floating-point predecessor and successor if v,
+    // respectively.
+    //
+    //      v- = v - 2^e        if f != 2^(p-1) or e == e_min                (A)
+    //         = v - 2^(e-1)    if f == 2^(p-1) and e > e_min                (B)
+    //
+    //      v+ = v + 2^e
+    //
+    // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_
+    // between m- and m+ round to v, regardless of how the input rounding
+    // algorithm breaks ties.
+    //
+    //      ---+-------------+-------------+-------------+-------------+---  (A)
+    //         v-            m-            v             m+            v+
+    //
+    //      -----------------+------+------+-------------+-------------+---  (B)
+    //                       v-     m-     v             m+            v+
+
+    const bool lower_boundary_is_closer = F == 0 && E > 1;
+    const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1);
+    const diyfp m_minus = lower_boundary_is_closer
+                          ? diyfp(4 * v.f - 1, v.e - 2)  // (B)
+                          : diyfp(2 * v.f - 1, v.e - 1); // (A)
+
+    // Determine the normalized w+ = m+.
+    const diyfp w_plus = diyfp::normalize(m_plus);
+
+    // Determine w- = m- such that e_(w-) = e_(w+).
+    const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e);
+
+    return {diyfp::normalize(v), w_minus, w_plus};
+}
+
+// Given normalized diyfp w, Grisu needs to find a (normalized) cached
+// power-of-ten c, such that the exponent of the product c * w = f * 2^e lies
+// within a certain range [alpha, gamma] (Definition 3.2 from [1])
+//
+//      alpha <= e = e_c + e_w + q <= gamma
+//
+// or
+//
+//      f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q
+//                          <= f_c * f_w * 2^gamma
+//
+// Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies
+//
+//      2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma
+//
+// or
+//
+//      2^(q - 2 + alpha) <= c * w < 2^(q + gamma)
+//
+// The choice of (alpha,gamma) determines the size of the table and the form of
+// the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well
+// in practice:
+//
+// The idea is to cut the number c * w = f * 2^e into two parts, which can be
+// processed independently: An integral part p1, and a fractional part p2:
+//
+//      f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e
+//              = (f div 2^-e) + (f mod 2^-e) * 2^e
+//              = p1 + p2 * 2^e
+//
+// The conversion of p1 into decimal form requires a series of divisions and
+// modulos by (a power of) 10. These operations are faster for 32-bit than for
+// 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be
+// achieved by choosing
+//
+//      -e >= 32   or   e <= -32 := gamma
+//
+// In order to convert the fractional part
+//
+//      p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ...
+//
+// into decimal form, the fraction is repeatedly multiplied by 10 and the digits
+// d[-i] are extracted in order:
+//
+//      (10 * p2) div 2^-e = d[-1]
+//      (10 * p2) mod 2^-e = d[-2] / 10^1 + ...
+//
+// The multiplication by 10 must not overflow. It is sufficient to choose
+//
+//      10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64.
+//
+// Since p2 = f mod 2^-e < 2^-e,
+//
+//      -e <= 60   or   e >= -60 := alpha
+
+constexpr int kAlpha = -60;
+constexpr int kGamma = -32;
+
+struct cached_power // c = f * 2^e ~= 10^k
+{
+    std::uint64_t f;
+    int e;
+    int k;
+};
+
+/*!
+For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached
+power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c
+satisfies (Definition 3.2 from [1])
+
+     alpha <= e_c + e + q <= gamma.
+*/
+inline cached_power get_cached_power_for_binary_exponent(int e)
+{
+    // Now
+    //
+    //      alpha <= e_c + e + q <= gamma                                    (1)
+    //      ==> f_c * 2^alpha <= c * 2^e * 2^q
+    //
+    // and since the c's are normalized, 2^(q-1) <= f_c,
+    //
+    //      ==> 2^(q - 1 + alpha) <= c * 2^(e + q)
+    //      ==> 2^(alpha - e - 1) <= c
+    //
+    // If c were an exact power of ten, i.e. c = 10^k, one may determine k as
+    //
+    //      k = ceil( log_10( 2^(alpha - e - 1) ) )
+    //        = ceil( (alpha - e - 1) * log_10(2) )
+    //
+    // From the paper:
+    // "In theory the result of the procedure could be wrong since c is rounded,
+    //  and the computation itself is approximated [...]. In practice, however,
+    //  this simple function is sufficient."
+    //
+    // For IEEE double precision floating-point numbers converted into
+    // normalized diyfp's w = f * 2^e, with q = 64,
+    //
+    //      e >= -1022      (min IEEE exponent)
+    //           -52        (p - 1)
+    //           -52        (p - 1, possibly normalize denormal IEEE numbers)
+    //           -11        (normalize the diyfp)
+    //         = -1137
+    //
+    // and
+    //
+    //      e <= +1023      (max IEEE exponent)
+    //           -52        (p - 1)
+    //           -11        (normalize the diyfp)
+    //         = 960
+    //
+    // This binary exponent range [-1137,960] results in a decimal exponent
+    // range [-307,324]. One does not need to store a cached power for each
+    // k in this range. For each such k it suffices to find a cached power
+    // such that the exponent of the product lies in [alpha,gamma].
+    // This implies that the difference of the decimal exponents of adjacent
+    // table entries must be less than or equal to
+    //
+    //      floor( (gamma - alpha) * log_10(2) ) = 8.
+    //
+    // (A smaller distance gamma-alpha would require a larger table.)
+
+    // NB:
+    // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34.
+
+    constexpr int kCachedPowersMinDecExp = -300;
+    constexpr int kCachedPowersDecStep = 8;
+
+    static constexpr std::array<cached_power, 79> kCachedPowers =
+    {
+        {
+            { 0xAB70FE17C79AC6CA, -1060, -300 },
+            { 0xFF77B1FCBEBCDC4F, -1034, -292 },
+            { 0xBE5691EF416BD60C, -1007, -284 },
+            { 0x8DD01FAD907FFC3C,  -980, -276 },
+            { 0xD3515C2831559A83,  -954, -268 },
+            { 0x9D71AC8FADA6C9B5,  -927, -260 },
+            { 0xEA9C227723EE8BCB,  -901, -252 },
+            { 0xAECC49914078536D,  -874, -244 },
+            { 0x823C12795DB6CE57,  -847, -236 },
+            { 0xC21094364DFB5637,  -821, -228 },
+            { 0x9096EA6F3848984F,  -794, -220 },
+            { 0xD77485CB25823AC7,  -768, -212 },
+            { 0xA086CFCD97BF97F4,  -741, -204 },
+            { 0xEF340A98172AACE5,  -715, -196 },
+            { 0xB23867FB2A35B28E,  -688, -188 },
+            { 0x84C8D4DFD2C63F3B,  -661, -180 },
+            { 0xC5DD44271AD3CDBA,  -635, -172 },
+            { 0x936B9FCEBB25C996,  -608, -164 },
+            { 0xDBAC6C247D62A584,  -582, -156 },
+            { 0xA3AB66580D5FDAF6,  -555, -148 },
+            { 0xF3E2F893DEC3F126,  -529, -140 },
+            { 0xB5B5ADA8AAFF80B8,  -502, -132 },
+            { 0x87625F056C7C4A8B,  -475, -124 },
+            { 0xC9BCFF6034C13053,  -449, -116 },
+            { 0x964E858C91BA2655,  -422, -108 },
+            { 0xDFF9772470297EBD,  -396, -100 },
+            { 0xA6DFBD9FB8E5B88F,  -369,  -92 },
+            { 0xF8A95FCF88747D94,  -343,  -84 },
+            { 0xB94470938FA89BCF,  -316,  -76 },
+            { 0x8A08F0F8BF0F156B,  -289,  -68 },
+            { 0xCDB02555653131B6,  -263,  -60 },
+            { 0x993FE2C6D07B7FAC,  -236,  -52 },
+            { 0xE45C10C42A2B3B06,  -210,  -44 },
+            { 0xAA242499697392D3,  -183,  -36 },
+            { 0xFD87B5F28300CA0E,  -157,  -28 },
+            { 0xBCE5086492111AEB,  -130,  -20 },
+            { 0x8CBCCC096F5088CC,  -103,  -12 },
+            { 0xD1B71758E219652C,   -77,   -4 },
+            { 0x9C40000000000000,   -50,    4 },
+            { 0xE8D4A51000000000,   -24,   12 },
+            { 0xAD78EBC5AC620000,     3,   20 },
+            { 0x813F3978F8940984,    30,   28 },
+            { 0xC097CE7BC90715B3,    56,   36 },
+            { 0x8F7E32CE7BEA5C70,    83,   44 },
+            { 0xD5D238A4ABE98068,   109,   52 },
+            { 0x9F4F2726179A2245,   136,   60 },
+            { 0xED63A231D4C4FB27,   162,   68 },
+            { 0xB0DE65388CC8ADA8,   189,   76 },
+            { 0x83C7088E1AAB65DB,   216,   84 },
+            { 0xC45D1DF942711D9A,   242,   92 },
+            { 0x924D692CA61BE758,   269,  100 },
+            { 0xDA01EE641A708DEA,   295,  108 },
+            { 0xA26DA3999AEF774A,   322,  116 },
+            { 0xF209787BB47D6B85,   348,  124 },
+            { 0xB454E4A179DD1877,   375,  132 },
+            { 0x865B86925B9BC5C2,   402,  140 },
+            { 0xC83553C5C8965D3D,   428,  148 },
+            { 0x952AB45CFA97A0B3,   455,  156 },
+            { 0xDE469FBD99A05FE3,   481,  164 },
+            { 0xA59BC234DB398C25,   508,  172 },
+            { 0xF6C69A72A3989F5C,   534,  180 },
+            { 0xB7DCBF5354E9BECE,   561,  188 },
+            { 0x88FCF317F22241E2,   588,  196 },
+            { 0xCC20CE9BD35C78A5,   614,  204 },
+            { 0x98165AF37B2153DF,   641,  212 },
+            { 0xE2A0B5DC971F303A,   667,  220 },
+            { 0xA8D9D1535CE3B396,   694,  228 },
+            { 0xFB9B7CD9A4A7443C,   720,  236 },
+            { 0xBB764C4CA7A44410,   747,  244 },
+            { 0x8BAB8EEFB6409C1A,   774,  252 },
+            { 0xD01FEF10A657842C,   800,  260 },
+            { 0x9B10A4E5E9913129,   827,  268 },
+            { 0xE7109BFBA19C0C9D,   853,  276 },
+            { 0xAC2820D9623BF429,   880,  284 },
+            { 0x80444B5E7AA7CF85,   907,  292 },
+            { 0xBF21E44003ACDD2D,   933,  300 },
+            { 0x8E679C2F5E44FF8F,   960,  308 },
+            { 0xD433179D9C8CB841,   986,  316 },
+            { 0x9E19DB92B4E31BA9,  1013,  324 },
+        }
+    };
+
+    // This computation gives exactly the same results for k as
+    //      k = ceil((kAlpha - e - 1) * 0.30102999566398114)
+    // for |e| <= 1500, but doesn't require floating-point operations.
+    // NB: log_10(2) ~= 78913 / 2^18
+    JSON_ASSERT(e >= -1500);
+    JSON_ASSERT(e <=  1500);
+    const int f = kAlpha - e - 1;
+    const int k = (f * 78913) / (1 << 18) + static_cast<int>(f > 0);
+
+    const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) / kCachedPowersDecStep;
+    JSON_ASSERT(index >= 0);
+    JSON_ASSERT(static_cast<std::size_t>(index) < kCachedPowers.size());
+
+    const cached_power cached = kCachedPowers[static_cast<std::size_t>(index)];
+    JSON_ASSERT(kAlpha <= cached.e + e + 64);
+    JSON_ASSERT(kGamma >= cached.e + e + 64);
+
+    return cached;
+}
+
+/*!
+For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k.
+For n == 0, returns 1 and sets pow10 := 1.
+*/
+inline int find_largest_pow10(const std::uint32_t n, std::uint32_t& pow10)
+{
+    // LCOV_EXCL_START
+    if (n >= 1000000000)
+    {
+        pow10 = 1000000000;
+        return 10;
+    }
+    // LCOV_EXCL_STOP
+    if (n >= 100000000)
+    {
+        pow10 = 100000000;
+        return  9;
+    }
+    if (n >= 10000000)
+    {
+        pow10 = 10000000;
+        return  8;
+    }
+    if (n >= 1000000)
+    {
+        pow10 = 1000000;
+        return  7;
+    }
+    if (n >= 100000)
+    {
+        pow10 = 100000;
+        return  6;
+    }
+    if (n >= 10000)
+    {
+        pow10 = 10000;
+        return  5;
+    }
+    if (n >= 1000)
+    {
+        pow10 = 1000;
+        return  4;
+    }
+    if (n >= 100)
+    {
+        pow10 = 100;
+        return  3;
+    }
+    if (n >= 10)
+    {
+        pow10 = 10;
+        return  2;
+    }
+
+    pow10 = 1;
+    return 1;
+}
+
+inline void grisu2_round(char* buf, int len, std::uint64_t dist, std::uint64_t delta,
+                         std::uint64_t rest, std::uint64_t ten_k)
+{
+    JSON_ASSERT(len >= 1);
+    JSON_ASSERT(dist <= delta);
+    JSON_ASSERT(rest <= delta);
+    JSON_ASSERT(ten_k > 0);
+
+    //               <--------------------------- delta ---->
+    //                                  <---- dist --------->
+    // --------------[------------------+-------------------]--------------
+    //               M-                 w                   M+
+    //
+    //                                  ten_k
+    //                                <------>
+    //                                       <---- rest ---->
+    // --------------[------------------+----+--------------]--------------
+    //                                  w    V
+    //                                       = buf * 10^k
+    //
+    // ten_k represents a unit-in-the-last-place in the decimal representation
+    // stored in buf.
+    // Decrement buf by ten_k while this takes buf closer to w.
+
+    // The tests are written in this order to avoid overflow in unsigned
+    // integer arithmetic.
+
+    while (rest < dist
+            && delta - rest >= ten_k
+            && (rest + ten_k < dist || dist - rest > rest + ten_k - dist))
+    {
+        JSON_ASSERT(buf[len - 1] != '0');
+        buf[len - 1]--;
+        rest += ten_k;
+    }
+}
+
+/*!
+Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+.
+M- and M+ must be normalized and share the same exponent -60 <= e <= -32.
+*/
+inline void grisu2_digit_gen(char* buffer, int& length, int& decimal_exponent,
+                             diyfp M_minus, diyfp w, diyfp M_plus)
+{
+    static_assert(kAlpha >= -60, "internal error");
+    static_assert(kGamma <= -32, "internal error");
+
+    // Generates the digits (and the exponent) of a decimal floating-point
+    // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's
+    // w, M- and M+ share the same exponent e, which satisfies alpha <= e <= gamma.
+    //
+    //               <--------------------------- delta ---->
+    //                                  <---- dist --------->
+    // --------------[------------------+-------------------]--------------
+    //               M-                 w                   M+
+    //
+    // Grisu2 generates the digits of M+ from left to right and stops as soon as
+    // V is in [M-,M+].
+
+    JSON_ASSERT(M_plus.e >= kAlpha);
+    JSON_ASSERT(M_plus.e <= kGamma);
+
+    std::uint64_t delta = diyfp::sub(M_plus, M_minus).f; // (significand of (M+ - M-), implicit exponent is e)
+    std::uint64_t dist  = diyfp::sub(M_plus, w      ).f; // (significand of (M+ - w ), implicit exponent is e)
+
+    // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0):
+    //
+    //      M+ = f * 2^e
+    //         = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e
+    //         = ((p1        ) * 2^-e + (p2        )) * 2^e
+    //         = p1 + p2 * 2^e
+
+    const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e);
+
+    auto p1 = static_cast<std::uint32_t>(M_plus.f >> -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.)
+    std::uint64_t p2 = M_plus.f & (one.f - 1);                    // p2 = f mod 2^-e
+
+    // 1)
+    //
+    // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0]
+
+    JSON_ASSERT(p1 > 0);
+
+    std::uint32_t pow10{};
+    const int k = find_largest_pow10(p1, pow10);
+
+    //      10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1)
+    //
+    //      p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1))
+    //         = (d[k-1]         ) * 10^(k-1) + (p1 mod 10^(k-1))
+    //
+    //      M+ = p1                                             + p2 * 2^e
+    //         = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1))          + p2 * 2^e
+    //         = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e
+    //         = d[k-1] * 10^(k-1) + (                         rest) * 2^e
+    //
+    // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0)
+    //
+    //      p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0]
+    //
+    // but stop as soon as
+    //
+    //      rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e
+
+    int n = k;
+    while (n > 0)
+    {
+        // Invariants:
+        //      M+ = buffer * 10^n + (p1 + p2 * 2^e)    (buffer = 0 for n = k)
+        //      pow10 = 10^(n-1) <= p1 < 10^n
+        //
+        const std::uint32_t d = p1 / pow10;  // d = p1 div 10^(n-1)
+        const std::uint32_t r = p1 % pow10;  // r = p1 mod 10^(n-1)
+        //
+        //      M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e
+        //         = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e)
+        //
+        JSON_ASSERT(d <= 9);
+        buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
+        //
+        //      M+ = buffer * 10^(n-1) + (r + p2 * 2^e)
+        //
+        p1 = r;
+        n--;
+        //
+        //      M+ = buffer * 10^n + (p1 + p2 * 2^e)
+        //      pow10 = 10^n
+        //
+
+        // Now check if enough digits have been generated.
+        // Compute
+        //
+        //      p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e
+        //
+        // Note:
+        // Since rest and delta share the same exponent e, it suffices to
+        // compare the significands.
+        const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2;
+        if (rest <= delta)
+        {
+            // V = buffer * 10^n, with M- <= V <= M+.
+
+            decimal_exponent += n;
+
+            // We may now just stop. But instead look if the buffer could be
+            // decremented to bring V closer to w.
+            //
+            // pow10 = 10^n is now 1 ulp in the decimal representation V.
+            // The rounding procedure works with diyfp's with an implicit
+            // exponent of e.
+            //
+            //      10^n = (10^n * 2^-e) * 2^e = ulp * 2^e
+            //
+            const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e;
+            grisu2_round(buffer, length, dist, delta, rest, ten_n);
+
+            return;
+        }
+
+        pow10 /= 10;
+        //
+        //      pow10 = 10^(n-1) <= p1 < 10^n
+        // Invariants restored.
+    }
+
+    // 2)
+    //
+    // The digits of the integral part have been generated:
+    //
+    //      M+ = d[k-1]...d[1]d[0] + p2 * 2^e
+    //         = buffer            + p2 * 2^e
+    //
+    // Now generate the digits of the fractional part p2 * 2^e.
+    //
+    // Note:
+    // No decimal point is generated: the exponent is adjusted instead.
+    //
+    // p2 actually represents the fraction
+    //
+    //      p2 * 2^e
+    //          = p2 / 2^-e
+    //          = d[-1] / 10^1 + d[-2] / 10^2 + ...
+    //
+    // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...)
+    //
+    //      p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m
+    //                      + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...)
+    //
+    // using
+    //
+    //      10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e)
+    //                = (                   d) * 2^-e + (                   r)
+    //
+    // or
+    //      10^m * p2 * 2^e = d + r * 2^e
+    //
+    // i.e.
+    //
+    //      M+ = buffer + p2 * 2^e
+    //         = buffer + 10^-m * (d + r * 2^e)
+    //         = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e
+    //
+    // and stop as soon as 10^-m * r * 2^e <= delta * 2^e
+
+    JSON_ASSERT(p2 > delta);
+
+    int m = 0;
+    for (;;)
+    {
+        // Invariant:
+        //      M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...) * 2^e
+        //         = buffer * 10^-m + 10^-m * (p2                                 ) * 2^e
+        //         = buffer * 10^-m + 10^-m * (1/10 * (10 * p2)                   ) * 2^e
+        //         = buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e + (10*p2 mod 2^-e)) * 2^e
+        //
+        JSON_ASSERT(p2 <= (std::numeric_limits<std::uint64_t>::max)() / 10);
+        p2 *= 10;
+        const std::uint64_t d = p2 >> -one.e;     // d = (10 * p2) div 2^-e
+        const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e
+        //
+        //      M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e
+        //         = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e))
+        //         = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e
+        //
+        JSON_ASSERT(d <= 9);
+        buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
+        //
+        //      M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e
+        //
+        p2 = r;
+        m++;
+        //
+        //      M+ = buffer * 10^-m + 10^-m * p2 * 2^e
+        // Invariant restored.
+
+        // Check if enough digits have been generated.
+        //
+        //      10^-m * p2 * 2^e <= delta * 2^e
+        //              p2 * 2^e <= 10^m * delta * 2^e
+        //                    p2 <= 10^m * delta
+        delta *= 10;
+        dist  *= 10;
+        if (p2 <= delta)
+        {
+            break;
+        }
+    }
+
+    // V = buffer * 10^-m, with M- <= V <= M+.
+
+    decimal_exponent -= m;
+
+    // 1 ulp in the decimal representation is now 10^-m.
+    // Since delta and dist are now scaled by 10^m, we need to do the
+    // same with ulp in order to keep the units in sync.
+    //
+    //      10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e
+    //
+    const std::uint64_t ten_m = one.f;
+    grisu2_round(buffer, length, dist, delta, p2, ten_m);
+
+    // By construction this algorithm generates the shortest possible decimal
+    // number (Loitsch, Theorem 6.2) which rounds back to w.
+    // For an input number of precision p, at least
+    //
+    //      N = 1 + ceil(p * log_10(2))
+    //
+    // decimal digits are sufficient to identify all binary floating-point
+    // numbers (Matula, "In-and-Out conversions").
+    // This implies that the algorithm does not produce more than N decimal
+    // digits.
+    //
+    //      N = 17 for p = 53 (IEEE double precision)
+    //      N = 9  for p = 24 (IEEE single precision)
+}
+
+/*!
+v = buf * 10^decimal_exponent
+len is the length of the buffer (number of decimal digits)
+The buffer must be large enough, i.e. >= max_digits10.
+*/
+JSON_HEDLEY_NON_NULL(1)
+inline void grisu2(char* buf, int& len, int& decimal_exponent,
+                   diyfp m_minus, diyfp v, diyfp m_plus)
+{
+    JSON_ASSERT(m_plus.e == m_minus.e);
+    JSON_ASSERT(m_plus.e == v.e);
+
+    //  --------(-----------------------+-----------------------)--------    (A)
+    //          m-                      v                       m+
+    //
+    //  --------------------(-----------+-----------------------)--------    (B)
+    //                      m-          v                       m+
+    //
+    // First scale v (and m- and m+) such that the exponent is in the range
+    // [alpha, gamma].
+
+    const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e);
+
+    const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k
+
+    // The exponent of the products is = v.e + c_minus_k.e + q and is in the range [alpha,gamma]
+    const diyfp w       = diyfp::mul(v,       c_minus_k);
+    const diyfp w_minus = diyfp::mul(m_minus, c_minus_k);
+    const diyfp w_plus  = diyfp::mul(m_plus,  c_minus_k);
+
+    //  ----(---+---)---------------(---+---)---------------(---+---)----
+    //          w-                      w                       w+
+    //          = c*m-                  = c*v                   = c*m+
+    //
+    // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and
+    // w+ are now off by a small amount.
+    // In fact:
+    //
+    //      w - v * 10^k < 1 ulp
+    //
+    // To account for this inaccuracy, add resp. subtract 1 ulp.
+    //
+    //  --------+---[---------------(---+---)---------------]---+--------
+    //          w-  M-                  w                   M+  w+
+    //
+    // Now any number in [M-, M+] (bounds included) will round to w when input,
+    // regardless of how the input rounding algorithm breaks ties.
+    //
+    // And digit_gen generates the shortest possible such number in [M-, M+].
+    // Note that this does not mean that Grisu2 always generates the shortest
+    // possible number in the interval (m-, m+).
+    const diyfp M_minus(w_minus.f + 1, w_minus.e);
+    const diyfp M_plus (w_plus.f  - 1, w_plus.e );
+
+    decimal_exponent = -cached.k; // = -(-k) = k
+
+    grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus);
+}
+
+/*!
+v = buf * 10^decimal_exponent
+len is the length of the buffer (number of decimal digits)
+The buffer must be large enough, i.e. >= max_digits10.
+*/
+template<typename FloatType>
+JSON_HEDLEY_NON_NULL(1)
+void grisu2(char* buf, int& len, int& decimal_exponent, FloatType value)
+{
+    static_assert(diyfp::kPrecision >= std::numeric_limits<FloatType>::digits + 3,
+                  "internal error: not enough precision");
+
+    JSON_ASSERT(std::isfinite(value));
+    JSON_ASSERT(value > 0);
+
+    // If the neighbors (and boundaries) of 'value' are always computed for double-precision
+    // numbers, all float's can be recovered using strtod (and strtof). However, the resulting
+    // decimal representations are not exactly "short".
+    //
+    // The documentation for 'std::to_chars' (https://en.cppreference.com/w/cpp/utility/to_chars)
+    // says "value is converted to a string as if by std::sprintf in the default ("C") locale"
+    // and since sprintf promotes floats to doubles, I think this is exactly what 'std::to_chars'
+    // does.
+    // On the other hand, the documentation for 'std::to_chars' requires that "parsing the
+    // representation using the corresponding std::from_chars function recovers value exactly". That
+    // indicates that single precision floating-point numbers should be recovered using
+    // 'std::strtof'.
+    //
+    // NB: If the neighbors are computed for single-precision numbers, there is a single float
+    //     (7.0385307e-26f) which can't be recovered using strtod. The resulting double precision
+    //     value is off by 1 ulp.
+#if 0
+    const boundaries w = compute_boundaries(static_cast<double>(value));
+#else
+    const boundaries w = compute_boundaries(value);
+#endif
+
+    grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus);
+}
+
+/*!
+@brief appends a decimal representation of e to buf
+@return a pointer to the element following the exponent.
+@pre -1000 < e < 1000
+*/
+JSON_HEDLEY_NON_NULL(1)
+JSON_HEDLEY_RETURNS_NON_NULL
+inline char* append_exponent(char* buf, int e)
+{
+    JSON_ASSERT(e > -1000);
+    JSON_ASSERT(e <  1000);
+
+    if (e < 0)
+    {
+        e = -e;
+        *buf++ = '-';
+    }
+    else
+    {
+        *buf++ = '+';
+    }
+
+    auto k = static_cast<std::uint32_t>(e);
+    if (k < 10)
+    {
+        // Always print at least two digits in the exponent.
+        // This is for compatibility with printf("%g").
+        *buf++ = '0';
+        *buf++ = static_cast<char>('0' + k);
+    }
+    else if (k < 100)
+    {
+        *buf++ = static_cast<char>('0' + k / 10);
+        k %= 10;
+        *buf++ = static_cast<char>('0' + k);
+    }
+    else
+    {
+        *buf++ = static_cast<char>('0' + k / 100);
+        k %= 100;
+        *buf++ = static_cast<char>('0' + k / 10);
+        k %= 10;
+        *buf++ = static_cast<char>('0' + k);
+    }
+
+    return buf;
+}
+
+/*!
+@brief prettify v = buf * 10^decimal_exponent
+
+If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point
+notation. Otherwise it will be printed in exponential notation.
+
+@pre min_exp < 0
+@pre max_exp > 0
+*/
+JSON_HEDLEY_NON_NULL(1)
+JSON_HEDLEY_RETURNS_NON_NULL
+inline char* format_buffer(char* buf, int len, int decimal_exponent,
+                           int min_exp, int max_exp)
+{
+    JSON_ASSERT(min_exp < 0);
+    JSON_ASSERT(max_exp > 0);
+
+    const int k = len;
+    const int n = len + decimal_exponent;
+
+    // v = buf * 10^(n-k)
+    // k is the length of the buffer (number of decimal digits)
+    // n is the position of the decimal point relative to the start of the buffer.
+
+    if (k <= n && n <= max_exp)
+    {
+        // digits[000]
+        // len <= max_exp + 2
+
+        std::memset(buf + k, '0', static_cast<size_t>(n) - static_cast<size_t>(k));
+        // Make it look like a floating-point number (#362, #378)
+        buf[n + 0] = '.';
+        buf[n + 1] = '0';
+        return buf + (static_cast<size_t>(n) + 2);
+    }
+
+    if (0 < n && n <= max_exp)
+    {
+        // dig.its
+        // len <= max_digits10 + 1
+
+        JSON_ASSERT(k > n);
+
+        std::memmove(buf + (static_cast<size_t>(n) + 1), buf + n, static_cast<size_t>(k) - static_cast<size_t>(n));
+        buf[n] = '.';
+        return buf + (static_cast<size_t>(k) + 1U);
+    }
+
+    if (min_exp < n && n <= 0)
+    {
+        // 0.[000]digits
+        // len <= 2 + (-min_exp - 1) + max_digits10
+
+        std::memmove(buf + (2 + static_cast<size_t>(-n)), buf, static_cast<size_t>(k));
+        buf[0] = '0';
+        buf[1] = '.';
+        std::memset(buf + 2, '0', static_cast<size_t>(-n));
+        return buf + (2U + static_cast<size_t>(-n) + static_cast<size_t>(k));
+    }
+
+    if (k == 1)
+    {
+        // dE+123
+        // len <= 1 + 5
+
+        buf += 1;
+    }
+    else
+    {
+        // d.igitsE+123
+        // len <= max_digits10 + 1 + 5
+
+        std::memmove(buf + 2, buf + 1, static_cast<size_t>(k) - 1);
+        buf[1] = '.';
+        buf += 1 + static_cast<size_t>(k);
+    }
+
+    *buf++ = 'e';
+    return append_exponent(buf, n - 1);
+}
+
+}  // namespace dtoa_impl
+
+/*!
+@brief generates a decimal representation of the floating-point number value in [first, last).
+
+The format of the resulting decimal representation is similar to printf's %g
+format. Returns an iterator pointing past-the-end of the decimal representation.
+
+@note The input number must be finite, i.e. NaN's and Inf's are not supported.
+@note The buffer must be large enough.
+@note The result is NOT null-terminated.
+*/
+template<typename FloatType>
+JSON_HEDLEY_NON_NULL(1, 2)
+JSON_HEDLEY_RETURNS_NON_NULL
+char* to_chars(char* first, const char* last, FloatType value)
+{
+    static_cast<void>(last); // maybe unused - fix warning
+    JSON_ASSERT(std::isfinite(value));
+
+    // Use signbit(value) instead of (value < 0) since signbit works for -0.
+    if (std::signbit(value))
+    {
+        value = -value;
+        *first++ = '-';
+    }
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#endif
+    if (value == 0) // +-0
+    {
+        *first++ = '0';
+        // Make it look like a floating-point number (#362, #378)
+        *first++ = '.';
+        *first++ = '0';
+        return first;
+    }
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+    JSON_ASSERT(last - first >= std::numeric_limits<FloatType>::max_digits10);
+
+    // Compute v = buffer * 10^decimal_exponent.
+    // The decimal digits are stored in the buffer, which needs to be interpreted
+    // as an unsigned decimal integer.
+    // len is the length of the buffer, i.e. the number of decimal digits.
+    int len = 0;
+    int decimal_exponent = 0;
+    dtoa_impl::grisu2(first, len, decimal_exponent, value);
+
+    JSON_ASSERT(len <= std::numeric_limits<FloatType>::max_digits10);
+
+    // Format the buffer like printf("%.*g", prec, value)
+    constexpr int kMinExp = -4;
+    // Use digits10 here to increase compatibility with version 2.
+    constexpr int kMaxExp = std::numeric_limits<FloatType>::digits10;
+
+    JSON_ASSERT(last - first >= kMaxExp + 2);
+    JSON_ASSERT(last - first >= 2 + (-kMinExp - 1) + std::numeric_limits<FloatType>::max_digits10);
+    JSON_ASSERT(last - first >= std::numeric_limits<FloatType>::max_digits10 + 6);
+
+    return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp, kMaxExp);
+}
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/exceptions.hpp>
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/meta/cpp_future.hpp>
+
+// #include <qualla/detail/output/binary_writer.hpp>
+
+// #include <qualla/detail/output/output_adapters.hpp>
+
+// #include <qualla/detail/string_concat.hpp>
+
+// #include <qualla/detail/value_t.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+namespace detail
+{
+
+///////////////////
+// serialization //
+///////////////////
+
+/// how to treat decoding errors
+enum class error_handler_t
+{
+    strict,  ///< throw a type_error exception in case of invalid UTF-8
+    replace, ///< replace invalid UTF-8 sequences with U+FFFD
+    ignore   ///< ignore invalid UTF-8 sequences
+};
+
+template<typename BasicJsonType>
+class serializer
+{
+    using string_t = typename BasicJsonType::string_t;
+    using number_float_t = typename BasicJsonType::number_float_t;
+    using number_integer_t = typename BasicJsonType::number_integer_t;
+    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+    using binary_char_t = typename BasicJsonType::binary_t::value_type;
+    static constexpr std::uint8_t UTF8_ACCEPT = 0;
+    static constexpr std::uint8_t UTF8_REJECT = 1;
+
+  public:
+    /*!
+    @param[in] s  output stream to serialize to
+    @param[in] ichar  indentation character to use
+    @param[in] error_handler_  how to react on decoding errors
+    */
+    serializer(output_adapter_t<char> s, const char ichar,
+               error_handler_t error_handler_ = error_handler_t::strict)
+        : o(std::move(s))
+        , loc(std::localeconv())
+        , thousands_sep(loc->thousands_sep == nullptr ? '\0' : std::char_traits<char>::to_char_type(* (loc->thousands_sep)))
+        , decimal_point(loc->decimal_point == nullptr ? '\0' : std::char_traits<char>::to_char_type(* (loc->decimal_point)))
+        , indent_char(ichar)
+        , indent_string(512, indent_char)
+        , error_handler(error_handler_)
+    {}
+
+    // delete because of pointer members
+    serializer(const serializer&) = delete;
+    serializer& operator=(const serializer&) = delete;
+    serializer(serializer&&) = delete;
+    serializer& operator=(serializer&&) = delete;
+    ~serializer() = default;
+
+    /*!
+    @brief internal implementation of the serialization function
+
+    This function is called by the public member function dump and organizes
+    the serialization internally. The indentation level is propagated as
+    additional parameter. In case of arrays and objects, the function is
+    called recursively.
+
+    - strings and object keys are escaped using `escape_string()`
+    - integer numbers are converted implicitly via `operator<<`
+    - floating-point numbers are converted to a string using `"%g"` format
+    - binary values are serialized as objects containing the subtype and the
+      byte array
+
+    @param[in] val               value to serialize
+    @param[in] pretty_print      whether the output shall be pretty-printed
+    @param[in] ensure_ascii If @a ensure_ascii is true, all non-ASCII characters
+    in the output are escaped with `\uXXXX` sequences, and the result consists
+    of ASCII characters only.
+    @param[in] indent_step       the indent level
+    @param[in] current_indent    the current indent level (only used internally)
+    */
+    void dump(const BasicJsonType& val,
+              const bool pretty_print,
+              const bool ensure_ascii,
+              const unsigned int indent_step,
+              const unsigned int current_indent = 0)
+    {
+        switch (val.m_type)
+        {
+            case value_t::object:
+            {
+                if (val.m_value.object->empty())
+                {
+                    o->write_characters("{}", 2);
+                    return;
+                }
+
+                if (pretty_print)
+                {
+                    o->write_characters("{\n", 2);
+
+                    // variable to hold indentation for recursive calls
+                    const auto new_indent = current_indent + indent_step;
+                    if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
+                    {
+                        indent_string.resize(indent_string.size() * 2, ' ');
+                    }
+
+                    // first n-1 elements
+                    auto i = val.m_value.object->cbegin();
+                    for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
+                    {
+                        o->write_characters(indent_string.c_str(), new_indent);
+                        o->write_character('\"');
+                        dump_escaped(i->first, ensure_ascii);
+                        o->write_characters("\": ", 3);
+                        dump(i->second, true, ensure_ascii, indent_step, new_indent);
+                        o->write_characters(",\n", 2);
+                    }
+
+                    // last element
+                    JSON_ASSERT(i != val.m_value.object->cend());
+                    JSON_ASSERT(std::next(i) == val.m_value.object->cend());
+                    o->write_characters(indent_string.c_str(), new_indent);
+                    o->write_character('\"');
+                    dump_escaped(i->first, ensure_ascii);
+                    o->write_characters("\": ", 3);
+                    dump(i->second, true, ensure_ascii, indent_step, new_indent);
+
+                    o->write_character('\n');
+                    o->write_characters(indent_string.c_str(), current_indent);
+                    o->write_character('}');
+                }
+                else
+                {
+                    o->write_character('{');
+
+                    // first n-1 elements
+                    auto i = val.m_value.object->cbegin();
+                    for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
+                    {
+                        o->write_character('\"');
+                        dump_escaped(i->first, ensure_ascii);
+                        o->write_characters("\":", 2);
+                        dump(i->second, false, ensure_ascii, indent_step, current_indent);
+                        o->write_character(',');
+                    }
+
+                    // last element
+                    JSON_ASSERT(i != val.m_value.object->cend());
+                    JSON_ASSERT(std::next(i) == val.m_value.object->cend());
+                    o->write_character('\"');
+                    dump_escaped(i->first, ensure_ascii);
+                    o->write_characters("\":", 2);
+                    dump(i->second, false, ensure_ascii, indent_step, current_indent);
+
+                    o->write_character('}');
+                }
+
+                return;
+            }
+
+            case value_t::array:
+            {
+                if (val.m_value.array->empty())
+                {
+                    o->write_characters("[]", 2);
+                    return;
+                }
+
+                if (pretty_print)
+                {
+                    o->write_characters("[\n", 2);
+
+                    // variable to hold indentation for recursive calls
+                    const auto new_indent = current_indent + indent_step;
+                    if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
+                    {
+                        indent_string.resize(indent_string.size() * 2, ' ');
+                    }
+
+                    // first n-1 elements
+                    for (auto i = val.m_value.array->cbegin();
+                            i != val.m_value.array->cend() - 1; ++i)
+                    {
+                        o->write_characters(indent_string.c_str(), new_indent);
+                        dump(*i, true, ensure_ascii, indent_step, new_indent);
+                        o->write_characters(",\n", 2);
+                    }
+
+                    // last element
+                    JSON_ASSERT(!val.m_value.array->empty());
+                    o->write_characters(indent_string.c_str(), new_indent);
+                    dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent);
+
+                    o->write_character('\n');
+                    o->write_characters(indent_string.c_str(), current_indent);
+                    o->write_character(']');
+                }
+                else
+                {
+                    o->write_character('[');
+
+                    // first n-1 elements
+                    for (auto i = val.m_value.array->cbegin();
+                            i != val.m_value.array->cend() - 1; ++i)
+                    {
+                        dump(*i, false, ensure_ascii, indent_step, current_indent);
+                        o->write_character(',');
+                    }
+
+                    // last element
+                    JSON_ASSERT(!val.m_value.array->empty());
+                    dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent);
+
+                    o->write_character(']');
+                }
+
+                return;
+            }
+
+            case value_t::string:
+            {
+                o->write_character('\"');
+                dump_escaped(*val.m_value.string, ensure_ascii);
+                o->write_character('\"');
+                return;
+            }
+
+            case value_t::binary:
+            {
+                if (pretty_print)
+                {
+                    o->write_characters("{\n", 2);
+
+                    // variable to hold indentation for recursive calls
+                    const auto new_indent = current_indent + indent_step;
+                    if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
+                    {
+                        indent_string.resize(indent_string.size() * 2, ' ');
+                    }
+
+                    o->write_characters(indent_string.c_str(), new_indent);
+
+                    o->write_characters("\"bytes\": [", 10);
+
+                    if (!val.m_value.binary->empty())
+                    {
+                        for (auto i = val.m_value.binary->cbegin();
+                                i != val.m_value.binary->cend() - 1; ++i)
+                        {
+                            dump_integer(*i);
+                            o->write_characters(", ", 2);
+                        }
+                        dump_integer(val.m_value.binary->back());
+                    }
+
+                    o->write_characters("],\n", 3);
+                    o->write_characters(indent_string.c_str(), new_indent);
+
+                    o->write_characters("\"subtype\": ", 11);
+                    if (val.m_value.binary->has_subtype())
+                    {
+                        dump_integer(val.m_value.binary->subtype());
+                    }
+                    else
+                    {
+                        o->write_characters("null", 4);
+                    }
+                    o->write_character('\n');
+                    o->write_characters(indent_string.c_str(), current_indent);
+                    o->write_character('}');
+                }
+                else
+                {
+                    o->write_characters("{\"bytes\":[", 10);
+
+                    if (!val.m_value.binary->empty())
+                    {
+                        for (auto i = val.m_value.binary->cbegin();
+                                i != val.m_value.binary->cend() - 1; ++i)
+                        {
+                            dump_integer(*i);
+                            o->write_character(',');
+                        }
+                        dump_integer(val.m_value.binary->back());
+                    }
+
+                    o->write_characters("],\"subtype\":", 12);
+                    if (val.m_value.binary->has_subtype())
+                    {
+                        dump_integer(val.m_value.binary->subtype());
+                        o->write_character('}');
+                    }
+                    else
+                    {
+                        o->write_characters("null}", 5);
+                    }
+                }
+                return;
+            }
+
+            case value_t::boolean:
+            {
+                if (val.m_value.boolean)
+                {
+                    o->write_characters("true", 4);
+                }
+                else
+                {
+                    o->write_characters("false", 5);
+                }
+                return;
+            }
+
+            case value_t::number_integer:
+            {
+                dump_integer(val.m_value.number_integer);
+                return;
+            }
+
+            case value_t::number_unsigned:
+            {
+                dump_integer(val.m_value.number_unsigned);
+                return;
+            }
+
+            case value_t::number_float:
+            {
+                dump_float(val.m_value.number_float);
+                return;
+            }
+
+            case value_t::discarded:
+            {
+                o->write_characters("<discarded>", 11);
+                return;
+            }
+
+            case value_t::null:
+            {
+                o->write_characters("null", 4);
+                return;
+            }
+
+            default:            // LCOV_EXCL_LINE
+                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
+        }
+    }
+
+  JSON_PRIVATE_UNLESS_TESTED:
+    /*!
+    @brief dump escaped string
+
+    Escape a string by replacing certain special characters by a sequence of an
+    escape character (backslash) and another character and other control
+    characters by a sequence of "\u" followed by a four-digit hex
+    representation. The escaped string is written to output stream @a o.
+
+    @param[in] s  the string to escape
+    @param[in] ensure_ascii  whether to escape non-ASCII characters with
+                             \uXXXX sequences
+
+    @complexity Linear in the length of string @a s.
+    */
+    void dump_escaped(const string_t& s, const bool ensure_ascii)
+    {
+        std::uint32_t codepoint{};
+        std::uint8_t state = UTF8_ACCEPT;
+        std::size_t bytes = 0;  // number of bytes written to string_buffer
+
+        // number of bytes written at the point of the last valid byte
+        std::size_t bytes_after_last_accept = 0;
+        std::size_t undumped_chars = 0;
+
+        for (std::size_t i = 0; i < s.size(); ++i)
+        {
+            const auto byte = static_cast<std::uint8_t>(s[i]);
+
+            switch (decode(state, codepoint, byte))
+            {
+                case UTF8_ACCEPT:  // decode found a new code point
+                {
+                    switch (codepoint)
+                    {
+                        case 0x08: // backspace
+                        {
+                            string_buffer[bytes++] = '\\';
+                            string_buffer[bytes++] = 'b';
+                            break;
+                        }
+
+                        case 0x09: // horizontal tab
+                        {
+                            string_buffer[bytes++] = '\\';
+                            string_buffer[bytes++] = 't';
+                            break;
+                        }
+
+                        case 0x0A: // newline
+                        {
+                            string_buffer[bytes++] = '\\';
+                            string_buffer[bytes++] = 'n';
+                            break;
+                        }
+
+                        case 0x0C: // formfeed
+                        {
+                            string_buffer[bytes++] = '\\';
+                            string_buffer[bytes++] = 'f';
+                            break;
+                        }
+
+                        case 0x0D: // carriage return
+                        {
+                            string_buffer[bytes++] = '\\';
+                            string_buffer[bytes++] = 'r';
+                            break;
+                        }
+
+                        case 0x22: // quotation mark
+                        {
+                            string_buffer[bytes++] = '\\';
+                            string_buffer[bytes++] = '\"';
+                            break;
+                        }
+
+                        case 0x5C: // reverse solidus
+                        {
+                            string_buffer[bytes++] = '\\';
+                            string_buffer[bytes++] = '\\';
+                            break;
+                        }
+
+                        default:
+                        {
+                            // escape control characters (0x00..0x1F) or, if
+                            // ensure_ascii parameter is used, non-ASCII characters
+                            if ((codepoint <= 0x1F) || (ensure_ascii && (codepoint >= 0x7F)))
+                            {
+                                if (codepoint <= 0xFFFF)
+                                {
+                                    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
+                                    static_cast<void>((std::snprintf)(string_buffer.data() + bytes, 7, "\\u%04x",
+                                                                      static_cast<std::uint16_t>(codepoint)));
+                                    bytes += 6;
+                                }
+                                else
+                                {
+                                    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
+                                    static_cast<void>((std::snprintf)(string_buffer.data() + bytes, 13, "\\u%04x\\u%04x",
+                                                                      static_cast<std::uint16_t>(0xD7C0u + (codepoint >> 10u)),
+                                                                      static_cast<std::uint16_t>(0xDC00u + (codepoint & 0x3FFu))));
+                                    bytes += 12;
+                                }
+                            }
+                            else
+                            {
+                                // copy byte to buffer (all previous bytes
+                                // been copied have in default case above)
+                                string_buffer[bytes++] = s[i];
+                            }
+                            break;
+                        }
+                    }
+
+                    // write buffer and reset index; there must be 13 bytes
+                    // left, as this is the maximal number of bytes to be
+                    // written ("\uxxxx\uxxxx\0") for one code point
+                    if (string_buffer.size() - bytes < 13)
+                    {
+                        o->write_characters(string_buffer.data(), bytes);
+                        bytes = 0;
+                    }
+
+                    // remember the byte position of this accept
+                    bytes_after_last_accept = bytes;
+                    undumped_chars = 0;
+                    break;
+                }
+
+                case UTF8_REJECT:  // decode found invalid UTF-8 byte
+                {
+                    switch (error_handler)
+                    {
+                        case error_handler_t::strict:
+                        {
+                            JSON_THROW(type_error::create(316, concat("invalid UTF-8 byte at index ", std::to_string(i), ": 0x", hex_bytes(byte | 0)), nullptr));
+                        }
+
+                        case error_handler_t::ignore:
+                        case error_handler_t::replace:
+                        {
+                            // in case we saw this character the first time, we
+                            // would like to read it again, because the byte
+                            // may be OK for itself, but just not OK for the
+                            // previous sequence
+                            if (undumped_chars > 0)
+                            {
+                                --i;
+                            }
+
+                            // reset length buffer to the last accepted index;
+                            // thus removing/ignoring the invalid characters
+                            bytes = bytes_after_last_accept;
+
+                            if (error_handler == error_handler_t::replace)
+                            {
+                                // add a replacement character
+                                if (ensure_ascii)
+                                {
+                                    string_buffer[bytes++] = '\\';
+                                    string_buffer[bytes++] = 'u';
+                                    string_buffer[bytes++] = 'f';
+                                    string_buffer[bytes++] = 'f';
+                                    string_buffer[bytes++] = 'f';
+                                    string_buffer[bytes++] = 'd';
+                                }
+                                else
+                                {
+                                    string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xEF');
+                                    string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBF');
+                                    string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBD');
+                                }
+
+                                // write buffer and reset index; there must be 13 bytes
+                                // left, as this is the maximal number of bytes to be
+                                // written ("\uxxxx\uxxxx\0") for one code point
+                                if (string_buffer.size() - bytes < 13)
+                                {
+                                    o->write_characters(string_buffer.data(), bytes);
+                                    bytes = 0;
+                                }
+
+                                bytes_after_last_accept = bytes;
+                            }
+
+                            undumped_chars = 0;
+
+                            // continue processing the string
+                            state = UTF8_ACCEPT;
+                            break;
+                        }
+
+                        default:            // LCOV_EXCL_LINE
+                            JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
+                    }
+                    break;
+                }
+
+                default:  // decode found yet incomplete multi-byte code point
+                {
+                    if (!ensure_ascii)
+                    {
+                        // code point will not be escaped - copy byte to buffer
+                        string_buffer[bytes++] = s[i];
+                    }
+                    ++undumped_chars;
+                    break;
+                }
+            }
+        }
+
+        // we finished processing the string
+        if (JSON_HEDLEY_LIKELY(state == UTF8_ACCEPT))
+        {
+            // write buffer
+            if (bytes > 0)
+            {
+                o->write_characters(string_buffer.data(), bytes);
+            }
+        }
+        else
+        {
+            // we finish reading, but do not accept: string was incomplete
+            switch (error_handler)
+            {
+                case error_handler_t::strict:
+                {
+                    JSON_THROW(type_error::create(316, concat("incomplete UTF-8 string; last byte: 0x", hex_bytes(static_cast<std::uint8_t>(s.back() | 0))), nullptr));
+                }
+
+                case error_handler_t::ignore:
+                {
+                    // write all accepted bytes
+                    o->write_characters(string_buffer.data(), bytes_after_last_accept);
+                    break;
+                }
+
+                case error_handler_t::replace:
+                {
+                    // write all accepted bytes
+                    o->write_characters(string_buffer.data(), bytes_after_last_accept);
+                    // add a replacement character
+                    if (ensure_ascii)
+                    {
+                        o->write_characters("\\ufffd", 6);
+                    }
+                    else
+                    {
+                        o->write_characters("\xEF\xBF\xBD", 3);
+                    }
+                    break;
+                }
+
+                default:            // LCOV_EXCL_LINE
+                    JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
+            }
+        }
+    }
+
+  private:
+    /*!
+    @brief count digits
+
+    Count the number of decimal (base 10) digits for an input unsigned integer.
+
+    @param[in] x  unsigned integer number to count its digits
+    @return    number of decimal digits
+    */
+    inline unsigned int count_digits(number_unsigned_t x) noexcept
+    {
+        unsigned int n_digits = 1;
+        for (;;)
+        {
+            if (x < 10)
+            {
+                return n_digits;
+            }
+            if (x < 100)
+            {
+                return n_digits + 1;
+            }
+            if (x < 1000)
+            {
+                return n_digits + 2;
+            }
+            if (x < 10000)
+            {
+                return n_digits + 3;
+            }
+            x = x / 10000u;
+            n_digits += 4;
+        }
+    }
+
+    /*!
+     * @brief convert a byte to a uppercase hex representation
+     * @param[in] byte byte to represent
+     * @return representation ("00".."FF")
+     */
+    static std::string hex_bytes(std::uint8_t byte)
+    {
+        std::string result = "FF";
+        constexpr const char* nibble_to_hex = "0123456789ABCDEF";
+        result[0] = nibble_to_hex[byte / 16];
+        result[1] = nibble_to_hex[byte % 16];
+        return result;
+    }
+
+    // templates to avoid warnings about useless casts
+    template <typename NumberType, enable_if_t<std::is_signed<NumberType>::value, int> = 0>
+    bool is_negative_number(NumberType x)
+    {
+        return x < 0;
+    }
+
+    template < typename NumberType, enable_if_t <std::is_unsigned<NumberType>::value, int > = 0 >
+    bool is_negative_number(NumberType /*unused*/)
+    {
+        return false;
+    }
+
+    /*!
+    @brief dump an integer
+
+    Dump a given integer to output stream @a o. Works internally with
+    @a number_buffer.
+
+    @param[in] x  integer number (signed or unsigned) to dump
+    @tparam NumberType either @a number_integer_t or @a number_unsigned_t
+    */
+    template < typename NumberType, detail::enable_if_t <
+                   std::is_integral<NumberType>::value ||
+                   std::is_same<NumberType, number_unsigned_t>::value ||
+                   std::is_same<NumberType, number_integer_t>::value ||
+                   std::is_same<NumberType, binary_char_t>::value,
+                   int > = 0 >
+    void dump_integer(NumberType x)
+    {
+        static constexpr std::array<std::array<char, 2>, 100> digits_to_99
+        {
+            {
+                {{'0', '0'}}, {{'0', '1'}}, {{'0', '2'}}, {{'0', '3'}}, {{'0', '4'}}, {{'0', '5'}}, {{'0', '6'}}, {{'0', '7'}}, {{'0', '8'}}, {{'0', '9'}},
+                {{'1', '0'}}, {{'1', '1'}}, {{'1', '2'}}, {{'1', '3'}}, {{'1', '4'}}, {{'1', '5'}}, {{'1', '6'}}, {{'1', '7'}}, {{'1', '8'}}, {{'1', '9'}},
+                {{'2', '0'}}, {{'2', '1'}}, {{'2', '2'}}, {{'2', '3'}}, {{'2', '4'}}, {{'2', '5'}}, {{'2', '6'}}, {{'2', '7'}}, {{'2', '8'}}, {{'2', '9'}},
+                {{'3', '0'}}, {{'3', '1'}}, {{'3', '2'}}, {{'3', '3'}}, {{'3', '4'}}, {{'3', '5'}}, {{'3', '6'}}, {{'3', '7'}}, {{'3', '8'}}, {{'3', '9'}},
+                {{'4', '0'}}, {{'4', '1'}}, {{'4', '2'}}, {{'4', '3'}}, {{'4', '4'}}, {{'4', '5'}}, {{'4', '6'}}, {{'4', '7'}}, {{'4', '8'}}, {{'4', '9'}},
+                {{'5', '0'}}, {{'5', '1'}}, {{'5', '2'}}, {{'5', '3'}}, {{'5', '4'}}, {{'5', '5'}}, {{'5', '6'}}, {{'5', '7'}}, {{'5', '8'}}, {{'5', '9'}},
+                {{'6', '0'}}, {{'6', '1'}}, {{'6', '2'}}, {{'6', '3'}}, {{'6', '4'}}, {{'6', '5'}}, {{'6', '6'}}, {{'6', '7'}}, {{'6', '8'}}, {{'6', '9'}},
+                {{'7', '0'}}, {{'7', '1'}}, {{'7', '2'}}, {{'7', '3'}}, {{'7', '4'}}, {{'7', '5'}}, {{'7', '6'}}, {{'7', '7'}}, {{'7', '8'}}, {{'7', '9'}},
+                {{'8', '0'}}, {{'8', '1'}}, {{'8', '2'}}, {{'8', '3'}}, {{'8', '4'}}, {{'8', '5'}}, {{'8', '6'}}, {{'8', '7'}}, {{'8', '8'}}, {{'8', '9'}},
+                {{'9', '0'}}, {{'9', '1'}}, {{'9', '2'}}, {{'9', '3'}}, {{'9', '4'}}, {{'9', '5'}}, {{'9', '6'}}, {{'9', '7'}}, {{'9', '8'}}, {{'9', '9'}},
+            }
+        };
+
+        // special case for "0"
+        if (x == 0)
+        {
+            o->write_character('0');
+            return;
+        }
+
+        // use a pointer to fill the buffer
+        auto buffer_ptr = number_buffer.begin(); // NOLINT(llvm-qualified-auto,readability-qualified-auto,cppcoreguidelines-pro-type-vararg,hicpp-vararg)
+
+        number_unsigned_t abs_value;
+
+        unsigned int n_chars{};
+
+        if (is_negative_number(x))
+        {
+            *buffer_ptr = '-';
+            abs_value = remove_sign(static_cast<number_integer_t>(x));
+
+            // account one more byte for the minus sign
+            n_chars = 1 + count_digits(abs_value);
+        }
+        else
+        {
+            abs_value = static_cast<number_unsigned_t>(x);
+            n_chars = count_digits(abs_value);
+        }
+
+        // spare 1 byte for '\0'
+        JSON_ASSERT(n_chars < number_buffer.size() - 1);
+
+        // jump to the end to generate the string from backward,
+        // so we later avoid reversing the result
+        buffer_ptr += n_chars;
+
+        // Fast int2ascii implementation inspired by "Fastware" talk by Andrei Alexandrescu
+        // See: https://www.youtube.com/watch?v=o4-CwDo2zpg
+        while (abs_value >= 100)
+        {
+            const auto digits_index = static_cast<unsigned>((abs_value % 100));
+            abs_value /= 100;
+            *(--buffer_ptr) = digits_to_99[digits_index][1];
+            *(--buffer_ptr) = digits_to_99[digits_index][0];
+        }
+
+        if (abs_value >= 10)
+        {
+            const auto digits_index = static_cast<unsigned>(abs_value);
+            *(--buffer_ptr) = digits_to_99[digits_index][1];
+            *(--buffer_ptr) = digits_to_99[digits_index][0];
+        }
+        else
+        {
+            *(--buffer_ptr) = static_cast<char>('0' + abs_value);
+        }
+
+        o->write_characters(number_buffer.data(), n_chars);
+    }
+
+    /*!
+    @brief dump a floating-point number
+
+    Dump a given floating-point number to output stream @a o. Works internally
+    with @a number_buffer.
+
+    @param[in] x  floating-point number to dump
+    */
+    void dump_float(number_float_t x)
+    {
+        // NaN / inf
+        if (!std::isfinite(x))
+        {
+            o->write_characters("null", 4);
+            return;
+        }
+
+        // If number_float_t is an IEEE-754 single or double precision number,
+        // use the Grisu2 algorithm to produce short numbers which are
+        // guaranteed to round-trip, using strtof and strtod, resp.
+        //
+        // NB: The test below works if <long double> == <double>.
+        static constexpr bool is_ieee_single_or_double
+            = (std::numeric_limits<number_float_t>::is_iec559 && std::numeric_limits<number_float_t>::digits == 24 && std::numeric_limits<number_float_t>::max_exponent == 128) ||
+              (std::numeric_limits<number_float_t>::is_iec559 && std::numeric_limits<number_float_t>::digits == 53 && std::numeric_limits<number_float_t>::max_exponent == 1024);
+
+        dump_float(x, std::integral_constant<bool, is_ieee_single_or_double>());
+    }
+
+    void dump_float(number_float_t x, std::true_type /*is_ieee_single_or_double*/)
+    {
+        auto* begin = number_buffer.data();
+        auto* end = ::qualla::detail::to_chars(begin, begin + number_buffer.size(), x);
+
+        o->write_characters(begin, static_cast<size_t>(end - begin));
+    }
+
+    void dump_float(number_float_t x, std::false_type /*is_ieee_single_or_double*/)
+    {
+        // get number of digits for a float -> text -> float round-trip
+        static constexpr auto d = std::numeric_limits<number_float_t>::max_digits10;
+
+        // the actual conversion
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
+        std::ptrdiff_t len = (std::snprintf)(number_buffer.data(), number_buffer.size(), "%.*g", d, x);
+
+        // negative value indicates an error
+        JSON_ASSERT(len > 0);
+        // check if buffer was large enough
+        JSON_ASSERT(static_cast<std::size_t>(len) < number_buffer.size());
+
+        // erase thousands separator
+        if (thousands_sep != '\0')
+        {
+            // NOLINTNEXTLINE(readability-qualified-auto,llvm-qualified-auto): std::remove returns an iterator, see https://github.com/nlohmann/json/issues/3081
+            const auto end = std::remove(number_buffer.begin(), number_buffer.begin() + len, thousands_sep);
+            std::fill(end, number_buffer.end(), '\0');
+            JSON_ASSERT((end - number_buffer.begin()) <= len);
+            len = (end - number_buffer.begin());
+        }
+
+        // convert decimal point to '.'
+        if (decimal_point != '\0' && decimal_point != '.')
+        {
+            // NOLINTNEXTLINE(readability-qualified-auto,llvm-qualified-auto): std::find returns an iterator, see https://github.com/nlohmann/json/issues/3081
+            const auto dec_pos = std::find(number_buffer.begin(), number_buffer.end(), decimal_point);
+            if (dec_pos != number_buffer.end())
+            {
+                *dec_pos = '.';
+            }
+        }
+
+        o->write_characters(number_buffer.data(), static_cast<std::size_t>(len));
+
+        // determine if we need to append ".0"
+        const bool value_is_int_like =
+            std::none_of(number_buffer.begin(), number_buffer.begin() + len + 1,
+                         [](char c)
+        {
+            return c == '.' || c == 'e';
+        });
+
+        if (value_is_int_like)
+        {
+            o->write_characters(".0", 2);
+        }
+    }
+
+    /*!
+    @brief check whether a string is UTF-8 encoded
+
+    The function checks each byte of a string whether it is UTF-8 encoded. The
+    result of the check is stored in the @a state parameter. The function must
+    be called initially with state 0 (accept). State 1 means the string must
+    be rejected, because the current byte is not allowed. If the string is
+    completely processed, but the state is non-zero, the string ended
+    prematurely; that is, the last byte indicated more bytes should have
+    followed.
+
+    @param[in,out] state  the state of the decoding
+    @param[in,out] codep  codepoint (valid only if resulting state is UTF8_ACCEPT)
+    @param[in] byte       next byte to decode
+    @return               new state
+
+    @note The function has been edited: a std::array is used.
+
+    @copyright Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+    @sa http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+    */
+    static std::uint8_t decode(std::uint8_t& state, std::uint32_t& codep, const std::uint8_t byte) noexcept
+    {
+        static const std::array<std::uint8_t, 400> utf8d =
+        {
+            {
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1F
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20..3F
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40..5F
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60..7F
+                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 80..9F
+                7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // A0..BF
+                8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0..DF
+                0xA, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, // E0..EF
+                0xB, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, // F0..FF
+                0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, // s0..s0
+                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, // s1..s2
+                1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // s3..s4
+                1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, // s5..s6
+                1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // s7..s8
+            }
+        };
+
+        JSON_ASSERT(byte < utf8d.size());
+        const std::uint8_t type = utf8d[byte];
+
+        codep = (state != UTF8_ACCEPT)
+                ? (byte & 0x3fu) | (codep << 6u)
+                : (0xFFu >> type) & (byte);
+
+        std::size_t index = 256u + static_cast<size_t>(state) * 16u + static_cast<size_t>(type);
+        JSON_ASSERT(index < 400);
+        state = utf8d[index];
+        return state;
+    }
+
+    /*
+     * Overload to make the compiler happy while it is instantiating
+     * dump_integer for number_unsigned_t.
+     * Must never be called.
+     */
+    number_unsigned_t remove_sign(number_unsigned_t x)
+    {
+        JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
+        return x; // LCOV_EXCL_LINE
+    }
+
+    /*
+     * Helper function for dump_integer
+     *
+     * This function takes a negative signed integer and returns its absolute
+     * value as unsigned integer. The plus/minus shuffling is necessary as we can
+     * not directly remove the sign of an arbitrary signed integer as the
+     * absolute values of INT_MIN and INT_MAX are usually not the same. See
+     * #1708 for details.
+     */
+    inline number_unsigned_t remove_sign(number_integer_t x) noexcept
+    {
+        JSON_ASSERT(x < 0 && x < (std::numeric_limits<number_integer_t>::max)()); // NOLINT(misc-redundant-expression)
+        return static_cast<number_unsigned_t>(-(x + 1)) + 1;
+    }
+
+  private:
+    /// the output of the serializer
+    output_adapter_t<char> o = nullptr;
+
+    /// a (hopefully) large enough character buffer
+    std::array<char, 64> number_buffer{{}};
+
+    /// the locale
+    const std::lconv* loc = nullptr;
+    /// the locale's thousand separator character
+    const char thousands_sep = '\0';
+    /// the locale's decimal point character
+    const char decimal_point = '\0';
+
+    /// string buffer
+    std::array<char, 512> string_buffer{{}};
+
+    /// the indentation character
+    const char indent_char;
+    /// the indentation string
+    string_t indent_string;
+
+    /// error_handler how to react on decoding errors
+    const error_handler_t error_handler;
+};
+
+}  // namespace detail
+NLOHMANN_JSON_NAMESPACE_END
+
+// #include <qualla/detail/value_t.hpp>
+
+// #include <qualla/json_fwd.hpp>
+
+// #include <qualla/ordered_map.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#include <functional> // equal_to, less
+#include <initializer_list> // initializer_list
+#include <iterator> // input_iterator_tag, iterator_traits
+#include <memory> // allocator
+#include <stdexcept> // for out_of_range
+#include <type_traits> // enable_if, is_convertible
+#include <utility> // pair
+#include <vector> // vector
+
+// #include <qualla/detail/macro_scope.hpp>
+
+// #include <qualla/detail/meta/type_traits.hpp>
+
+
+NLOHMANN_JSON_NAMESPACE_BEGIN
+
+/// ordered_map: a minimal map-like container that preserves insertion order
+/// for use within qualla::basic_json<ordered_map>
+template <class Key, class T, class IgnoredLess = std::less<Key>,
+          class Allocator = std::allocator<std::pair<const Key, T>>>
+                  struct ordered_map : std::vector<std::pair<const Key, T>, Allocator>
+{
+    using key_type = Key;
+    using mapped_type = T;
+    using Container = std::vector<std::pair<const Key, T>, Allocator>;
+    using iterator = typename Container::iterator;
+    using const_iterator = typename Container::const_iterator;
+    using size_type = typename Container::size_type;
+    using value_type = typename Container::value_type;
+#ifdef JSON_HAS_CPP_14
+    using key_compare = std::equal_to<>;
+#else
+    using key_compare = std::equal_to<Key>;
+#endif
+
+    // Explicit constructors instead of `using Container::Container`
+    // otherwise older compilers choke on it (GCC <= 5.5, xcode <= 9.4)
+    ordered_map() noexcept(noexcept(Container())) : Container{} {}
+    explicit ordered_map(const Allocator& alloc) noexcept(noexcept(Container(alloc))) : Container{alloc} {}
+    template <class It>
+    ordered_map(It first, It last, const Allocator& alloc = Allocator())
+        : Container{first, last, alloc} {}
+    ordered_map(std::initializer_list<value_type> init, const Allocator& alloc = Allocator() )
+        : Container{init, alloc} {}
+
+    std::pair<iterator, bool> emplace(const key_type& key, T&& t)
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                return {it, false};
+            }
+        }
+        Container::emplace_back(key, std::forward<T>(t));
+        return {std::prev(this->end()), true};
+    }
+
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_key_type<key_compare, key_type, KeyType>::value, int> = 0>
+    std::pair<iterator, bool> emplace(KeyType && key, T && t)
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                return {it, false};
+            }
+        }
+        Container::emplace_back(std::forward<KeyType>(key), std::forward<T>(t));
+        return {std::prev(this->end()), true};
+    }
+
+    T& operator[](const key_type& key)
+    {
+        return emplace(key, T{}).first->second;
+    }
+
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_key_type<key_compare, key_type, KeyType>::value, int> = 0>
+    T & operator[](KeyType && key)
+    {
+        return emplace(std::forward<KeyType>(key), T{}).first->second;
+    }
+
+    const T& operator[](const key_type& key) const
+    {
+        return at(key);
+    }
+
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_key_type<key_compare, key_type, KeyType>::value, int> = 0>
+    const T & operator[](KeyType && key) const
+    {
+        return at(std::forward<KeyType>(key));
+    }
+
+    T& at(const key_type& key)
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                return it->second;
+            }
+        }
+
+        JSON_THROW(std::out_of_range("key not found"));
+    }
+
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_key_type<key_compare, key_type, KeyType>::value, int> = 0>
+    T & at(KeyType && key)
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                return it->second;
+            }
+        }
+
+        JSON_THROW(std::out_of_range("key not found"));
+    }
+
+    const T& at(const key_type& key) const
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                return it->second;
+            }
+        }
+
+        JSON_THROW(std::out_of_range("key not found"));
+    }
+
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_key_type<key_compare, key_type, KeyType>::value, int> = 0>
+    const T & at(KeyType && key) const
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                return it->second;
+            }
+        }
+
+        JSON_THROW(std::out_of_range("key not found"));
+    }
+
+    size_type erase(const key_type& key)
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                // Since we cannot move const Keys, re-construct them in place
+                for (auto next = it; ++next != this->end(); ++it)
+                {
+                    it->~value_type(); // Destroy but keep allocation
+                    new (&*it) value_type{std::move(*next)};
+                }
+                Container::pop_back();
+                return 1;
+            }
+        }
+        return 0;
+    }
+
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_key_type<key_compare, key_type, KeyType>::value, int> = 0>
+    size_type erase(KeyType && key)
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                // Since we cannot move const Keys, re-construct them in place
+                for (auto next = it; ++next != this->end(); ++it)
+                {
+                    it->~value_type(); // Destroy but keep allocation
+                    new (&*it) value_type{std::move(*next)};
+                }
+                Container::pop_back();
+                return 1;
+            }
+        }
+        return 0;
+    }
+
+    iterator erase(iterator pos)
+    {
+        return erase(pos, std::next(pos));
+    }
+
+    iterator erase(iterator first, iterator last)
+    {
+        if (first == last)
+        {
+            return first;
+        }
+
+        const auto elements_affected = std::distance(first, last);
+        const auto offset = std::distance(Container::begin(), first);
+
+        // This is the start situation. We need to delete elements_affected
+        // elements (3 in this example: e, f, g), and need to return an
+        // iterator past the last deleted element (h in this example).
+        // Note that offset is the distance from the start of the vector
+        // to first. We will need this later.
+
+        // [ a, b, c, d, e, f, g, h, i, j ]
+        //               ^        ^
+        //             first    last
+
+        // Since we cannot move const Keys, we re-construct them in place.
+        // We start at first and re-construct (viz. copy) the elements from
+        // the back of the vector. Example for first iteration:
+
+        //               ,--------.
+        //               v        |   destroy e and re-construct with h
+        // [ a, b, c, d, e, f, g, h, i, j ]
+        //               ^        ^
+        //               it       it + elements_affected
+
+        for (auto it = first; std::next(it, elements_affected) != Container::end(); ++it)
+        {
+            it->~value_type(); // destroy but keep allocation
+            new (&*it) value_type{std::move(*std::next(it, elements_affected))}; // "move" next element to it
+        }
+
+        // [ a, b, c, d, h, i, j, h, i, j ]
+        //               ^        ^
+        //             first    last
+
+        // remove the unneeded elements at the end of the vector
+        Container::resize(this->size() - static_cast<size_type>(elements_affected));
+
+        // [ a, b, c, d, h, i, j ]
+        //               ^        ^
+        //             first    last
+
+        // first is now pointing past the last deleted element, but we cannot
+        // use this iterator, because it may have been invalidated by the
+        // resize call. Instead, we can return begin() + offset.
+        return Container::begin() + offset;
+    }
+
+    size_type count(const key_type& key) const
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                return 1;
+            }
+        }
+        return 0;
+    }
+
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_key_type<key_compare, key_type, KeyType>::value, int> = 0>
+    size_type count(KeyType && key) const
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                return 1;
+            }
+        }
+        return 0;
+    }
+
+    iterator find(const key_type& key)
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                return it;
+            }
+        }
+        return Container::end();
+    }
+
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_key_type<key_compare, key_type, KeyType>::value, int> = 0>
+    iterator find(KeyType && key)
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                return it;
+            }
+        }
+        return Container::end();
+    }
+
+    const_iterator find(const key_type& key) const
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, key))
+            {
+                return it;
+            }
+        }
+        return Container::end();
+    }
+
+    std::pair<iterator, bool> insert( value_type&& value )
+    {
+        return emplace(value.first, std::move(value.second));
+    }
+
+    std::pair<iterator, bool> insert( const value_type& value )
+    {
+        for (auto it = this->begin(); it != this->end(); ++it)
+        {
+            if (m_compare(it->first, value.first))
+            {
+                return {it, false};
+            }
+        }
+        Container::push_back(value);
+        return {--this->end(), true};
+    }
+
+    template<typename InputIt>
+    using require_input_iter = typename std::enable_if<std::is_convertible<typename std::iterator_traits<InputIt>::iterator_category,
+            std::input_iterator_tag>::value>::type;
+
+    template<typename InputIt, typename = require_input_iter<InputIt>>
+    void insert(InputIt first, InputIt last)
+    {
+        for (auto it = first; it != last; ++it)
+        {
+            insert(*it);
+        }
+    }
+
+private:
+    JSON_NO_UNIQUE_ADDRESS key_compare m_compare = key_compare();
+};
+
+NLOHMANN_JSON_NAMESPACE_END
+
+
+#if defined(JSON_HAS_CPP_17)
+    #include <any>
+    #include <string_view>
+#endif
+
+/*!
+@brief namespace for Niels Lohmann
+@see https://github.com/qualla
+@since version 1.0.0
+*/
+NLOHMANN_JSON_NAMESPACE_BEGIN
+
+/*!
+@brief a class to store JSON values
+
+@internal
+@invariant The member variables @a m_value and @a m_type have the following
+relationship:
+- If `m_type == value_t::object`, then `m_value.object != nullptr`.
+- If `m_type == value_t::array`, then `m_value.array != nullptr`.
+- If `m_type == value_t::string`, then `m_value.string != nullptr`.
+The invariants are checked by member function assert_invariant().
+
+@note ObjectType trick from https://stackoverflow.com/a/9860911
+@endinternal
+
+@since version 1.0.0
+
+@nosubgrouping
+*/
+NLOHMANN_BASIC_JSON_TPL_DECLARATION
+class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions)
+{
+  private:
+    template<detail::value_t> friend struct detail::external_constructor;
+
+    template<typename>
+    friend class ::qualla::json_pointer;
+    // can be restored when json_pointer backwards compatibility is removed
+    // friend ::qualla::json_pointer<StringType>;
+
+    template<typename BasicJsonType, typename InputType>
+    friend class ::qualla::detail::parser;
+    friend ::qualla::detail::serializer<basic_json>;
+    template<typename BasicJsonType>
+    friend class ::qualla::detail::iter_impl;
+    template<typename BasicJsonType, typename CharType>
+    friend class ::qualla::detail::binary_writer;
+    template<typename BasicJsonType, typename InputType, typename SAX>
+    friend class ::qualla::detail::binary_reader;
+    template<typename BasicJsonType>
+    friend class ::qualla::detail::json_sax_dom_parser;
+    template<typename BasicJsonType>
+    friend class ::qualla::detail::json_sax_dom_callback_parser;
+    friend class ::qualla::detail::exception;
+
+    /// workaround type for MSVC
+    using basic_json_t = NLOHMANN_BASIC_JSON_TPL;
+
+  JSON_PRIVATE_UNLESS_TESTED:
+    // convenience aliases for types residing in namespace detail;
+    using lexer = ::qualla::detail::lexer_base<basic_json>;
+
+    template<typename InputAdapterType>
+    static ::qualla::detail::parser<basic_json, InputAdapterType> parser(
+        InputAdapterType adapter,
+        detail::parser_callback_t<basic_json>cb = nullptr,
+        const bool allow_exceptions = true,
+        const bool ignore_comments = false
+                                 )
+    {
+        return ::qualla::detail::parser<basic_json, InputAdapterType>(std::move(adapter),
+                std::move(cb), allow_exceptions, ignore_comments);
+    }
+
+  private:
+    using primitive_iterator_t = ::qualla::detail::primitive_iterator_t;
+    template<typename BasicJsonType>
+    using internal_iterator = ::qualla::detail::internal_iterator<BasicJsonType>;
+    template<typename BasicJsonType>
+    using iter_impl = ::qualla::detail::iter_impl<BasicJsonType>;
+    template<typename Iterator>
+    using iteration_proxy = ::qualla::detail::iteration_proxy<Iterator>;
+    template<typename Base> using json_reverse_iterator = ::qualla::detail::json_reverse_iterator<Base>;
+
+    template<typename CharType>
+    using output_adapter_t = ::qualla::detail::output_adapter_t<CharType>;
+
+    template<typename InputType>
+    using binary_reader = ::qualla::detail::binary_reader<basic_json, InputType>;
+    template<typename CharType> using binary_writer = ::qualla::detail::binary_writer<basic_json, CharType>;
+
+  JSON_PRIVATE_UNLESS_TESTED:
+    using serializer = ::qualla::detail::serializer<basic_json>;
+
+  public:
+    using value_t = detail::value_t;
+    /// JSON Pointer, see @ref qualla::json_pointer
+    using json_pointer = ::qualla::json_pointer<StringType>;
+    template<typename T, typename SFINAE>
+    using json_serializer = JSONSerializer<T, SFINAE>;
+    /// how to treat decoding errors
+    using error_handler_t = detail::error_handler_t;
+    /// how to treat CBOR tags
+    using cbor_tag_handler_t = detail::cbor_tag_handler_t;
+    /// helper type for initializer lists of basic_json values
+    using initializer_list_t = std::initializer_list<detail::json_ref<basic_json>>;
+
+    using input_format_t = detail::input_format_t;
+    /// SAX interface type, see @ref qualla::json_sax
+    using json_sax_t = json_sax<basic_json>;
+
+    ////////////////
+    // exceptions //
+    ////////////////
+
+    /// @name exceptions
+    /// Classes to implement user-defined exceptions.
+    /// @{
+
+    using exception = detail::exception;
+    using parse_error = detail::parse_error;
+    using invalid_iterator = detail::invalid_iterator;
+    using type_error = detail::type_error;
+    using out_of_range = detail::out_of_range;
+    using other_error = detail::other_error;
+
+    /// @}
+
+
+    /////////////////////
+    // container types //
+    /////////////////////
+
+    /// @name container types
+    /// The canonic container types to use @ref basic_json like any other STL
+    /// container.
+    /// @{
+
+    /// the type of elements in a basic_json container
+    using value_type = basic_json;
+
+    /// the type of an element reference
+    using reference = value_type&;
+    /// the type of an element const reference
+    using const_reference = const value_type&;
+
+    /// a type to represent differences between iterators
+    using difference_type = std::ptrdiff_t;
+    /// a type to represent container sizes
+    using size_type = std::size_t;
+
+    /// the allocator type
+    using allocator_type = AllocatorType<basic_json>;
+
+    /// the type of an element pointer
+    using pointer = typename std::allocator_traits<allocator_type>::pointer;
+    /// the type of an element const pointer
+    using const_pointer = typename std::allocator_traits<allocator_type>::const_pointer;
+
+    /// an iterator for a basic_json container
+    using iterator = iter_impl<basic_json>;
+    /// a const iterator for a basic_json container
+    using const_iterator = iter_impl<const basic_json>;
+    /// a reverse iterator for a basic_json container
+    using reverse_iterator = json_reverse_iterator<typename basic_json::iterator>;
+    /// a const reverse iterator for a basic_json container
+    using const_reverse_iterator = json_reverse_iterator<typename basic_json::const_iterator>;
+
+    /// @}
+
+
+    /// @brief returns the allocator associated with the container
+    /// @sa https://json.qualla.me/api/basic_json/get_allocator/
+    static allocator_type get_allocator()
+    {
+        return allocator_type();
+    }
+
+    /// @brief returns version information on the library
+    /// @sa https://json.qualla.me/api/basic_json/meta/
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json meta()
+    {
+        basic_json result;
+
+        result["copyright"] = "(C) 2013-2022 Niels Lohmann";
+        result["name"] = "JSON for Modern C++";
+        result["url"] = "https://github.com/nlohmann/json";
+        result["version"]["string"] =
+            detail::concat(std::to_string(NLOHMANN_JSON_VERSION_MAJOR), '.',
+                           std::to_string(NLOHMANN_JSON_VERSION_MINOR), '.',
+                           std::to_string(NLOHMANN_JSON_VERSION_PATCH));
+        result["version"]["major"] = NLOHMANN_JSON_VERSION_MAJOR;
+        result["version"]["minor"] = NLOHMANN_JSON_VERSION_MINOR;
+        result["version"]["patch"] = NLOHMANN_JSON_VERSION_PATCH;
+
+#ifdef _WIN32
+        result["platform"] = "win32";
+#elif defined __linux__
+        result["platform"] = "linux";
+#elif defined __APPLE__
+        result["platform"] = "apple";
+#elif defined __unix__
+        result["platform"] = "unix";
+#else
+        result["platform"] = "unknown";
+#endif
+
+#if defined(__ICC) || defined(__INTEL_COMPILER)
+        result["compiler"] = {{"family", "icc"}, {"version", __INTEL_COMPILER}};
+#elif defined(__clang__)
+        result["compiler"] = {{"family", "clang"}, {"version", __clang_version__}};
+#elif defined(__GNUC__) || defined(__GNUG__)
+        result["compiler"] = {{"family", "gcc"}, {"version", detail::concat(
+                    std::to_string(__GNUC__), '.',
+                    std::to_string(__GNUC_MINOR__), '.',
+                    std::to_string(__GNUC_PATCHLEVEL__))
+            }
+        };
+#elif defined(__HP_cc) || defined(__HP_aCC)
+        result["compiler"] = "hp"
+#elif defined(__IBMCPP__)
+        result["compiler"] = {{"family", "ilecpp"}, {"version", __IBMCPP__}};
+#elif defined(_MSC_VER)
+        result["compiler"] = {{"family", "msvc"}, {"version", _MSC_VER}};
+#elif defined(__PGI)
+        result["compiler"] = {{"family", "pgcpp"}, {"version", __PGI}};
+#elif defined(__SUNPRO_CC)
+        result["compiler"] = {{"family", "sunpro"}, {"version", __SUNPRO_CC}};
+#else
+        result["compiler"] = {{"family", "unknown"}, {"version", "unknown"}};
+#endif
+
+
+#if defined(_MSVC_LANG)
+        result["compiler"]["c++"] = std::to_string(_MSVC_LANG);
+#elif defined(__cplusplus)
+        result["compiler"]["c++"] = std::to_string(__cplusplus);
+#else
+        result["compiler"]["c++"] = "unknown";
+#endif
+        return result;
+    }
+
+
+    ///////////////////////////
+    // JSON value data types //
+    ///////////////////////////
+
+    /// @name JSON value data types
+    /// The data types to store a JSON value. These types are derived from
+    /// the template arguments passed to class @ref basic_json.
+    /// @{
+
+    /// @brief default object key comparator type
+    /// The actual object key comparator type (@ref object_comparator_t) may be
+    /// different.
+    /// @sa https://json.qualla.me/api/basic_json/default_object_comparator_t/
+#if defined(JSON_HAS_CPP_14)
+    // use of transparent comparator avoids unnecessary repeated construction of temporaries
+    // in functions involving lookup by key with types other than object_t::key_type (aka. StringType)
+    using default_object_comparator_t = std::less<>;
+#else
+    using default_object_comparator_t = std::less<StringType>;
+#endif
+
+    /// @brief a type for an object
+    /// @sa https://json.qualla.me/api/basic_json/object_t/
+    using object_t = ObjectType<StringType,
+          basic_json,
+          default_object_comparator_t,
+          AllocatorType<std::pair<const StringType,
+          basic_json>>>;
+
+    /// @brief a type for an array
+    /// @sa https://json.qualla.me/api/basic_json/array_t/
+    using array_t = ArrayType<basic_json, AllocatorType<basic_json>>;
+
+    /// @brief a type for a string
+    /// @sa https://json.qualla.me/api/basic_json/string_t/
+    using string_t = StringType;
+
+    /// @brief a type for a boolean
+    /// @sa https://json.qualla.me/api/basic_json/boolean_t/
+    using boolean_t = BooleanType;
+
+    /// @brief a type for a number (integer)
+    /// @sa https://json.qualla.me/api/basic_json/number_integer_t/
+    using number_integer_t = NumberIntegerType;
+
+    /// @brief a type for a number (unsigned)
+    /// @sa https://json.qualla.me/api/basic_json/number_unsigned_t/
+    using number_unsigned_t = NumberUnsignedType;
+
+    /// @brief a type for a number (floating-point)
+    /// @sa https://json.qualla.me/api/basic_json/number_float_t/
+    using number_float_t = NumberFloatType;
+
+    /// @brief a type for a packed binary type
+    /// @sa https://json.qualla.me/api/basic_json/binary_t/
+    using binary_t = qualla::byte_container_with_subtype<BinaryType>;
+
+    /// @brief object key comparator type
+    /// @sa https://json.qualla.me/api/basic_json/object_comparator_t/
+    using object_comparator_t = detail::actual_object_comparator_t<basic_json>;
+
+    /// @}
+
+  private:
+
+    /// helper for exception-safe object creation
+    template<typename T, typename... Args>
+    JSON_HEDLEY_RETURNS_NON_NULL
+    static T* create(Args&& ... args)
+    {
+        AllocatorType<T> alloc;
+        using AllocatorTraits = std::allocator_traits<AllocatorType<T>>;
+
+        auto deleter = [&](T * obj)
+        {
+            AllocatorTraits::deallocate(alloc, obj, 1);
+        };
+        std::unique_ptr<T, decltype(deleter)> obj(AllocatorTraits::allocate(alloc, 1), deleter);
+        AllocatorTraits::construct(alloc, obj.get(), std::forward<Args>(args)...);
+        JSON_ASSERT(obj != nullptr);
+        return obj.release();
+    }
+
+    ////////////////////////
+    // JSON value storage //
+    ////////////////////////
+
+  JSON_PRIVATE_UNLESS_TESTED:
+    /*!
+    @brief a JSON value
+
+    The actual storage for a JSON value of the @ref basic_json class. This
+    union combines the different storage types for the JSON value types
+    defined in @ref value_t.
+
+    JSON type | value_t type    | used type
+    --------- | --------------- | ------------------------
+    object    | object          | pointer to @ref object_t
+    array     | array           | pointer to @ref array_t
+    string    | string          | pointer to @ref string_t
+    boolean   | boolean         | @ref boolean_t
+    number    | number_integer  | @ref number_integer_t
+    number    | number_unsigned | @ref number_unsigned_t
+    number    | number_float    | @ref number_float_t
+    binary    | binary          | pointer to @ref binary_t
+    null      | null            | *no value is stored*
+
+    @note Variable-length types (objects, arrays, and strings) are stored as
+    pointers. The size of the union should not exceed 64 bits if the default
+    value types are used.
+
+    @since version 1.0.0
+    */
+    union json_value
+    {
+        /// object (stored with pointer to save storage)
+        object_t* object;
+        /// array (stored with pointer to save storage)
+        array_t* array;
+        /// string (stored with pointer to save storage)
+        string_t* string;
+        /// binary (stored with pointer to save storage)
+        binary_t* binary;
+        /// boolean
+        boolean_t boolean;
+        /// number (integer)
+        number_integer_t number_integer;
+        /// number (unsigned integer)
+        number_unsigned_t number_unsigned;
+        /// number (floating-point)
+        number_float_t number_float;
+
+        /// default constructor (for null values)
+        json_value() = default;
+        /// constructor for booleans
+        json_value(boolean_t v) noexcept : boolean(v) {}
+        /// constructor for numbers (integer)
+        json_value(number_integer_t v) noexcept : number_integer(v) {}
+        /// constructor for numbers (unsigned)
+        json_value(number_unsigned_t v) noexcept : number_unsigned(v) {}
+        /// constructor for numbers (floating-point)
+        json_value(number_float_t v) noexcept : number_float(v) {}
+        /// constructor for empty values of a given type
+        json_value(value_t t)
+        {
+            switch (t)
+            {
+                case value_t::object:
+                {
+                    object = create<object_t>();
+                    break;
+                }
+
+                case value_t::array:
+                {
+                    array = create<array_t>();
+                    break;
+                }
+
+                case value_t::string:
+                {
+                    string = create<string_t>("");
+                    break;
+                }
+
+                case value_t::binary:
+                {
+                    binary = create<binary_t>();
+                    break;
+                }
+
+                case value_t::boolean:
+                {
+                    boolean = static_cast<boolean_t>(false);
+                    break;
+                }
+
+                case value_t::number_integer:
+                {
+                    number_integer = static_cast<number_integer_t>(0);
+                    break;
+                }
+
+                case value_t::number_unsigned:
+                {
+                    number_unsigned = static_cast<number_unsigned_t>(0);
+                    break;
+                }
+
+                case value_t::number_float:
+                {
+                    number_float = static_cast<number_float_t>(0.0);
+                    break;
+                }
+
+                case value_t::null:
+                {
+                    object = nullptr;  // silence warning, see #821
+                    break;
+                }
+
+                case value_t::discarded:
+                default:
+                {
+                    object = nullptr;  // silence warning, see #821
+                    if (JSON_HEDLEY_UNLIKELY(t == value_t::null))
+                    {
+                        JSON_THROW(other_error::create(500, "961c151d2e87f2686a955a9be24d316f1362bf21 3.11.2", nullptr)); // LCOV_EXCL_LINE
+                    }
+                    break;
+                }
+            }
+        }
+
+        /// constructor for strings
+        json_value(const string_t& value) : string(create<string_t>(value)) {}
+
+        /// constructor for rvalue strings
+        json_value(string_t&& value) : string(create<string_t>(std::move(value))) {}
+
+        /// constructor for objects
+        json_value(const object_t& value) : object(create<object_t>(value)) {}
+
+        /// constructor for rvalue objects
+        json_value(object_t&& value) : object(create<object_t>(std::move(value))) {}
+
+        /// constructor for arrays
+        json_value(const array_t& value) : array(create<array_t>(value)) {}
+
+        /// constructor for rvalue arrays
+        json_value(array_t&& value) : array(create<array_t>(std::move(value))) {}
+
+        /// constructor for binary arrays
+        json_value(const typename binary_t::container_type& value) : binary(create<binary_t>(value)) {}
+
+        /// constructor for rvalue binary arrays
+        json_value(typename binary_t::container_type&& value) : binary(create<binary_t>(std::move(value))) {}
+
+        /// constructor for binary arrays (internal type)
+        json_value(const binary_t& value) : binary(create<binary_t>(value)) {}
+
+        /// constructor for rvalue binary arrays (internal type)
+        json_value(binary_t&& value) : binary(create<binary_t>(std::move(value))) {}
+
+        void destroy(value_t t)
+        {
+            if (t == value_t::array || t == value_t::object)
+            {
+                // flatten the current json_value to a heap-allocated stack
+                std::vector<basic_json> stack;
+
+                // move the top-level items to stack
+                if (t == value_t::array)
+                {
+                    stack.reserve(array->size());
+                    std::move(array->begin(), array->end(), std::back_inserter(stack));
+                }
+                else
+                {
+                    stack.reserve(object->size());
+                    for (auto&& it : *object)
+                    {
+                        stack.push_back(std::move(it.second));
+                    }
+                }
+
+                while (!stack.empty())
+                {
+                    // move the last item to local variable to be processed
+                    basic_json current_item(std::move(stack.back()));
+                    stack.pop_back();
+
+                    // if current_item is array/object, move
+                    // its children to the stack to be processed later
+                    if (current_item.is_array())
+                    {
+                        std::move(current_item.m_value.array->begin(), current_item.m_value.array->end(), std::back_inserter(stack));
+
+                        current_item.m_value.array->clear();
+                    }
+                    else if (current_item.is_object())
+                    {
+                        for (auto&& it : *current_item.m_value.object)
+                        {
+                            stack.push_back(std::move(it.second));
+                        }
+
+                        current_item.m_value.object->clear();
+                    }
+
+                    // it's now safe that current_item get destructed
+                    // since it doesn't have any children
+                }
+            }
+
+            switch (t)
+            {
+                case value_t::object:
+                {
+                    AllocatorType<object_t> alloc;
+                    std::allocator_traits<decltype(alloc)>::destroy(alloc, object);
+                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, object, 1);
+                    break;
+                }
+
+                case value_t::array:
+                {
+                    AllocatorType<array_t> alloc;
+                    std::allocator_traits<decltype(alloc)>::destroy(alloc, array);
+                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, array, 1);
+                    break;
+                }
+
+                case value_t::string:
+                {
+                    AllocatorType<string_t> alloc;
+                    std::allocator_traits<decltype(alloc)>::destroy(alloc, string);
+                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, string, 1);
+                    break;
+                }
+
+                case value_t::binary:
+                {
+                    AllocatorType<binary_t> alloc;
+                    std::allocator_traits<decltype(alloc)>::destroy(alloc, binary);
+                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, binary, 1);
+                    break;
+                }
+
+                case value_t::null:
+                case value_t::boolean:
+                case value_t::number_integer:
+                case value_t::number_unsigned:
+                case value_t::number_float:
+                case value_t::discarded:
+                default:
+                {
+                    break;
+                }
+            }
+        }
+    };
+
+  private:
+    /*!
+    @brief checks the class invariants
+
+    This function asserts the class invariants. It needs to be called at the
+    end of every constructor to make sure that created objects respect the
+    invariant. Furthermore, it has to be called each time the type of a JSON
+    value is changed, because the invariant expresses a relationship between
+    @a m_type and @a m_value.
+
+    Furthermore, the parent relation is checked for arrays and objects: If
+    @a check_parents true and the value is an array or object, then the
+    container's elements must have the current value as parent.
+
+    @param[in] check_parents  whether the parent relation should be checked.
+               The value is true by default and should only be set to false
+               during destruction of objects when the invariant does not
+               need to hold.
+    */
+    void assert_invariant(bool check_parents = true) const noexcept
+    {
+        JSON_ASSERT(m_type != value_t::object || m_value.object != nullptr);
+        JSON_ASSERT(m_type != value_t::array || m_value.array != nullptr);
+        JSON_ASSERT(m_type != value_t::string || m_value.string != nullptr);
+        JSON_ASSERT(m_type != value_t::binary || m_value.binary != nullptr);
+
+#if JSON_DIAGNOSTICS
+        JSON_TRY
+        {
+            // cppcheck-suppress assertWithSideEffect
+            JSON_ASSERT(!check_parents || !is_structured() || std::all_of(begin(), end(), [this](const basic_json & j)
+            {
+                return j.m_parent == this;
+            }));
+        }
+        JSON_CATCH(...) {} // LCOV_EXCL_LINE
+#endif
+        static_cast<void>(check_parents);
+    }
+
+    void set_parents()
+    {
+#if JSON_DIAGNOSTICS
+        switch (m_type)
+        {
+            case value_t::array:
+            {
+                for (auto& element : *m_value.array)
+                {
+                    element.m_parent = this;
+                }
+                break;
+            }
+
+            case value_t::object:
+            {
+                for (auto& element : *m_value.object)
+                {
+                    element.second.m_parent = this;
+                }
+                break;
+            }
+
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+                break;
+        }
+#endif
+    }
+
+    iterator set_parents(iterator it, typename iterator::difference_type count_set_parents)
+    {
+#if JSON_DIAGNOSTICS
+        for (typename iterator::difference_type i = 0; i < count_set_parents; ++i)
+        {
+            (it + i)->m_parent = this;
+        }
+#else
+        static_cast<void>(count_set_parents);
+#endif
+        return it;
+    }
+
+    reference set_parent(reference j, std::size_t old_capacity = static_cast<std::size_t>(-1))
+    {
+#if JSON_DIAGNOSTICS
+        if (old_capacity != static_cast<std::size_t>(-1))
+        {
+            // see https://github.com/nlohmann/json/issues/2838
+            JSON_ASSERT(type() == value_t::array);
+            if (JSON_HEDLEY_UNLIKELY(m_value.array->capacity() != old_capacity))
+            {
+                // capacity has changed: update all parents
+                set_parents();
+                return j;
+            }
+        }
+
+        // ordered_json uses a vector internally, so pointers could have
+        // been invalidated; see https://github.com/nlohmann/json/issues/2962
+#ifdef JSON_HEDLEY_MSVC_VERSION
+#pragma warning(push )
+#pragma warning(disable : 4127) // ignore warning to replace if with if constexpr
+#endif
+        if (detail::is_ordered_map<object_t>::value)
+        {
+            set_parents();
+            return j;
+        }
+#ifdef JSON_HEDLEY_MSVC_VERSION
+#pragma warning( pop )
+#endif
+
+        j.m_parent = this;
+#else
+        static_cast<void>(j);
+        static_cast<void>(old_capacity);
+#endif
+        return j;
+    }
+
+  public:
+    //////////////////////////
+    // JSON parser callback //
+    //////////////////////////
+
+    /// @brief parser event types
+    /// @sa https://json.qualla.me/api/basic_json/parse_event_t/
+    using parse_event_t = detail::parse_event_t;
+
+    /// @brief per-element parser callback type
+    /// @sa https://json.qualla.me/api/basic_json/parser_callback_t/
+    using parser_callback_t = detail::parser_callback_t<basic_json>;
+
+    //////////////////
+    // constructors //
+    //////////////////
+
+    /// @name constructors and destructors
+    /// Constructors of class @ref basic_json, copy/move constructor, copy
+    /// assignment, static functions creating objects, and the destructor.
+    /// @{
+
+    /// @brief create an empty value with a given type
+    /// @sa https://json.qualla.me/api/basic_json/basic_json/
+    basic_json(const value_t v)
+        : m_type(v), m_value(v)
+    {
+        assert_invariant();
+    }
+
+    /// @brief create a null object
+    /// @sa https://json.qualla.me/api/basic_json/basic_json/
+    basic_json(std::nullptr_t = nullptr) noexcept // NOLINT(bugprone-exception-escape)
+        : basic_json(value_t::null)
+    {
+        assert_invariant();
+    }
+
+    /// @brief create a JSON value from compatible types
+    /// @sa https://json.qualla.me/api/basic_json/basic_json/
+    template < typename CompatibleType,
+               typename U = detail::uncvref_t<CompatibleType>,
+               detail::enable_if_t <
+                   !detail::is_basic_json<U>::value && detail::is_compatible_type<basic_json_t, U>::value, int > = 0 >
+    basic_json(CompatibleType && val) noexcept(noexcept( // NOLINT(bugprone-forwarding-reference-overload,bugprone-exception-escape)
+                JSONSerializer<U>::to_json(std::declval<basic_json_t&>(),
+                                           std::forward<CompatibleType>(val))))
+    {
+        JSONSerializer<U>::to_json(*this, std::forward<CompatibleType>(val));
+        set_parents();
+        assert_invariant();
+    }
+
+    /// @brief create a JSON value from an existing one
+    /// @sa https://json.qualla.me/api/basic_json/basic_json/
+    template < typename BasicJsonType,
+               detail::enable_if_t <
+                   detail::is_basic_json<BasicJsonType>::value&& !std::is_same<basic_json, BasicJsonType>::value, int > = 0 >
+    basic_json(const BasicJsonType& val)
+    {
+        using other_boolean_t = typename BasicJsonType::boolean_t;
+        using other_number_float_t = typename BasicJsonType::number_float_t;
+        using other_number_integer_t = typename BasicJsonType::number_integer_t;
+        using other_number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+        using other_string_t = typename BasicJsonType::string_t;
+        using other_object_t = typename BasicJsonType::object_t;
+        using other_array_t = typename BasicJsonType::array_t;
+        using other_binary_t = typename BasicJsonType::binary_t;
+
+        switch (val.type())
+        {
+            case value_t::boolean:
+                JSONSerializer<other_boolean_t>::to_json(*this, val.template get<other_boolean_t>());
+                break;
+            case value_t::number_float:
+                JSONSerializer<other_number_float_t>::to_json(*this, val.template get<other_number_float_t>());
+                break;
+            case value_t::number_integer:
+                JSONSerializer<other_number_integer_t>::to_json(*this, val.template get<other_number_integer_t>());
+                break;
+            case value_t::number_unsigned:
+                JSONSerializer<other_number_unsigned_t>::to_json(*this, val.template get<other_number_unsigned_t>());
+                break;
+            case value_t::string:
+                JSONSerializer<other_string_t>::to_json(*this, val.template get_ref<const other_string_t&>());
+                break;
+            case value_t::object:
+                JSONSerializer<other_object_t>::to_json(*this, val.template get_ref<const other_object_t&>());
+                break;
+            case value_t::array:
+                JSONSerializer<other_array_t>::to_json(*this, val.template get_ref<const other_array_t&>());
+                break;
+            case value_t::binary:
+                JSONSerializer<other_binary_t>::to_json(*this, val.template get_ref<const other_binary_t&>());
+                break;
+            case value_t::null:
+                *this = nullptr;
+                break;
+            case value_t::discarded:
+                m_type = value_t::discarded;
+                break;
+            default:            // LCOV_EXCL_LINE
+                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
+        }
+        JSON_ASSERT(m_type == val.type());
+        set_parents();
+        assert_invariant();
+    }
+
+    /// @brief create a container (array or object) from an initializer list
+    /// @sa https://json.qualla.me/api/basic_json/basic_json/
+    basic_json(initializer_list_t init,
+               bool type_deduction = true,
+               value_t manual_type = value_t::array)
+    {
+        // check if each element is an array with two elements whose first
+        // element is a string
+        bool is_an_object = std::all_of(init.begin(), init.end(),
+                                        [](const detail::json_ref<basic_json>& element_ref)
+        {
+            return element_ref->is_array() && element_ref->size() == 2 && (*element_ref)[0].is_string();
+        });
+
+        // adjust type if type deduction is not wanted
+        if (!type_deduction)
+        {
+            // if array is wanted, do not create an object though possible
+            if (manual_type == value_t::array)
+            {
+                is_an_object = false;
+            }
+
+            // if object is wanted but impossible, throw an exception
+            if (JSON_HEDLEY_UNLIKELY(manual_type == value_t::object && !is_an_object))
+            {
+                JSON_THROW(type_error::create(301, "cannot create object from initializer list", nullptr));
+            }
+        }
+
+        if (is_an_object)
+        {
+            // the initializer list is a list of pairs -> create object
+            m_type = value_t::object;
+            m_value = value_t::object;
+
+            for (auto& element_ref : init)
+            {
+                auto element = element_ref.moved_or_copied();
+                m_value.object->emplace(
+                    std::move(*((*element.m_value.array)[0].m_value.string)),
+                    std::move((*element.m_value.array)[1]));
+            }
+        }
+        else
+        {
+            // the initializer list describes an array -> create array
+            m_type = value_t::array;
+            m_value.array = create<array_t>(init.begin(), init.end());
+        }
+
+        set_parents();
+        assert_invariant();
+    }
+
+    /// @brief explicitly create a binary array (without subtype)
+    /// @sa https://json.qualla.me/api/basic_json/binary/
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json binary(const typename binary_t::container_type& init)
+    {
+        auto res = basic_json();
+        res.m_type = value_t::binary;
+        res.m_value = init;
+        return res;
+    }
+
+    /// @brief explicitly create a binary array (with subtype)
+    /// @sa https://json.qualla.me/api/basic_json/binary/
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json binary(const typename binary_t::container_type& init, typename binary_t::subtype_type subtype)
+    {
+        auto res = basic_json();
+        res.m_type = value_t::binary;
+        res.m_value = binary_t(init, subtype);
+        return res;
+    }
+
+    /// @brief explicitly create a binary array
+    /// @sa https://json.qualla.me/api/basic_json/binary/
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json binary(typename binary_t::container_type&& init)
+    {
+        auto res = basic_json();
+        res.m_type = value_t::binary;
+        res.m_value = std::move(init);
+        return res;
+    }
+
+    /// @brief explicitly create a binary array (with subtype)
+    /// @sa https://json.qualla.me/api/basic_json/binary/
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json binary(typename binary_t::container_type&& init, typename binary_t::subtype_type subtype)
+    {
+        auto res = basic_json();
+        res.m_type = value_t::binary;
+        res.m_value = binary_t(std::move(init), subtype);
+        return res;
+    }
+
+    /// @brief explicitly create an array from an initializer list
+    /// @sa https://json.qualla.me/api/basic_json/array/
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json array(initializer_list_t init = {})
+    {
+        return basic_json(init, false, value_t::array);
+    }
+
+    /// @brief explicitly create an object from an initializer list
+    /// @sa https://json.qualla.me/api/basic_json/object/
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json object(initializer_list_t init = {})
+    {
+        return basic_json(init, false, value_t::object);
+    }
+
+    /// @brief construct an array with count copies of given value
+    /// @sa https://json.qualla.me/api/basic_json/basic_json/
+    basic_json(size_type cnt, const basic_json& val)
+        : m_type(value_t::array)
+    {
+        m_value.array = create<array_t>(cnt, val);
+        set_parents();
+        assert_invariant();
+    }
+
+    /// @brief construct a JSON container given an iterator range
+    /// @sa https://json.qualla.me/api/basic_json/basic_json/
+    template < class InputIT, typename std::enable_if <
+                   std::is_same<InputIT, typename basic_json_t::iterator>::value ||
+                   std::is_same<InputIT, typename basic_json_t::const_iterator>::value, int >::type = 0 >
+    basic_json(InputIT first, InputIT last)
+    {
+        JSON_ASSERT(first.m_object != nullptr);
+        JSON_ASSERT(last.m_object != nullptr);
+
+        // make sure iterator fits the current value
+        if (JSON_HEDLEY_UNLIKELY(first.m_object != last.m_object))
+        {
+            JSON_THROW(invalid_iterator::create(201, "iterators are not compatible", nullptr));
+        }
+
+        // copy type from first iterator
+        m_type = first.m_object->m_type;
+
+        // check if iterator range is complete for primitive values
+        switch (m_type)
+        {
+            case value_t::boolean:
+            case value_t::number_float:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::string:
+            {
+                if (JSON_HEDLEY_UNLIKELY(!first.m_it.primitive_iterator.is_begin()
+                                         || !last.m_it.primitive_iterator.is_end()))
+                {
+                    JSON_THROW(invalid_iterator::create(204, "iterators out of range", first.m_object));
+                }
+                break;
+            }
+
+            case value_t::null:
+            case value_t::object:
+            case value_t::array:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+                break;
+        }
+
+        switch (m_type)
+        {
+            case value_t::number_integer:
+            {
+                m_value.number_integer = first.m_object->m_value.number_integer;
+                break;
+            }
+
+            case value_t::number_unsigned:
+            {
+                m_value.number_unsigned = first.m_object->m_value.number_unsigned;
+                break;
+            }
+
+            case value_t::number_float:
+            {
+                m_value.number_float = first.m_object->m_value.number_float;
+                break;
+            }
+
+            case value_t::boolean:
+            {
+                m_value.boolean = first.m_object->m_value.boolean;
+                break;
+            }
+
+            case value_t::string:
+            {
+                m_value = *first.m_object->m_value.string;
+                break;
+            }
+
+            case value_t::object:
+            {
+                m_value.object = create<object_t>(first.m_it.object_iterator,
+                                                  last.m_it.object_iterator);
+                break;
+            }
+
+            case value_t::array:
+            {
+                m_value.array = create<array_t>(first.m_it.array_iterator,
+                                                last.m_it.array_iterator);
+                break;
+            }
+
+            case value_t::binary:
+            {
+                m_value = *first.m_object->m_value.binary;
+                break;
+            }
+
+            case value_t::null:
+            case value_t::discarded:
+            default:
+                JSON_THROW(invalid_iterator::create(206, detail::concat("cannot construct with iterators from ", first.m_object->type_name()), first.m_object));
+        }
+
+        set_parents();
+        assert_invariant();
+    }
+
+
+    ///////////////////////////////////////
+    // other constructors and destructor //
+    ///////////////////////////////////////
+
+    template<typename JsonRef,
+             detail::enable_if_t<detail::conjunction<detail::is_json_ref<JsonRef>,
+                                 std::is_same<typename JsonRef::value_type, basic_json>>::value, int> = 0 >
+    basic_json(const JsonRef& ref) : basic_json(ref.moved_or_copied()) {}
+
+    /// @brief copy constructor
+    /// @sa https://json.qualla.me/api/basic_json/basic_json/
+    basic_json(const basic_json& other)
+        : m_type(other.m_type)
+    {
+        // check of passed value is valid
+        other.assert_invariant();
+
+        switch (m_type)
+        {
+            case value_t::object:
+            {
+                m_value = *other.m_value.object;
+                break;
+            }
+
+            case value_t::array:
+            {
+                m_value = *other.m_value.array;
+                break;
+            }
+
+            case value_t::string:
+            {
+                m_value = *other.m_value.string;
+                break;
+            }
+
+            case value_t::boolean:
+            {
+                m_value = other.m_value.boolean;
+                break;
+            }
+
+            case value_t::number_integer:
+            {
+                m_value = other.m_value.number_integer;
+                break;
+            }
+
+            case value_t::number_unsigned:
+            {
+                m_value = other.m_value.number_unsigned;
+                break;
+            }
+
+            case value_t::number_float:
+            {
+                m_value = other.m_value.number_float;
+                break;
+            }
+
+            case value_t::binary:
+            {
+                m_value = *other.m_value.binary;
+                break;
+            }
+
+            case value_t::null:
+            case value_t::discarded:
+            default:
+                break;
+        }
+
+        set_parents();
+        assert_invariant();
+    }
+
+    /// @brief move constructor
+    /// @sa https://json.qualla.me/api/basic_json/basic_json/
+    basic_json(basic_json&& other) noexcept
+        : m_type(std::move(other.m_type)),
+          m_value(std::move(other.m_value))
+    {
+        // check that passed value is valid
+        other.assert_invariant(false);
+
+        // invalidate payload
+        other.m_type = value_t::null;
+        other.m_value = {};
+
+        set_parents();
+        assert_invariant();
+    }
+
+    /// @brief copy assignment
+    /// @sa https://json.qualla.me/api/basic_json/operator=/
+    basic_json& operator=(basic_json other) noexcept (
+        std::is_nothrow_move_constructible<value_t>::value&&
+        std::is_nothrow_move_assignable<value_t>::value&&
+        std::is_nothrow_move_constructible<json_value>::value&&
+        std::is_nothrow_move_assignable<json_value>::value
+    )
+    {
+        // check that passed value is valid
+        other.assert_invariant();
+
+        using std::swap;
+        swap(m_type, other.m_type);
+        swap(m_value, other.m_value);
+
+        set_parents();
+        assert_invariant();
+        return *this;
+    }
+
+    /// @brief destructor
+    /// @sa https://json.qualla.me/api/basic_json/~basic_json/
+    ~basic_json() noexcept
+    {
+        assert_invariant(false);
+        m_value.destroy(m_type);
+    }
+
+    /// @}
+
+  public:
+    ///////////////////////
+    // object inspection //
+    ///////////////////////
+
+    /// @name object inspection
+    /// Functions to inspect the type of a JSON value.
+    /// @{
+
+    /// @brief serialization
+    /// @sa https://json.qualla.me/api/basic_json/dump/
+    string_t dump(const int indent = -1,
+                  const char indent_char = ' ',
+                  const bool ensure_ascii = false,
+                  const error_handler_t error_handler = error_handler_t::strict) const
+    {
+        string_t result;
+        serializer s(detail::output_adapter<char, string_t>(result), indent_char, error_handler);
+
+        if (indent >= 0)
+        {
+            s.dump(*this, true, ensure_ascii, static_cast<unsigned int>(indent));
+        }
+        else
+        {
+            s.dump(*this, false, ensure_ascii, 0);
+        }
+
+        return result;
+    }
+
+    /// @brief return the type of the JSON value (explicit)
+    /// @sa https://json.qualla.me/api/basic_json/type/
+    constexpr value_t type() const noexcept
+    {
+        return m_type;
+    }
+
+    /// @brief return whether type is primitive
+    /// @sa https://json.qualla.me/api/basic_json/is_primitive/
+    constexpr bool is_primitive() const noexcept
+    {
+        return is_null() || is_string() || is_boolean() || is_number() || is_binary();
+    }
+
+    /// @brief return whether type is structured
+    /// @sa https://json.qualla.me/api/basic_json/is_structured/
+    constexpr bool is_structured() const noexcept
+    {
+        return is_array() || is_object();
+    }
+
+    /// @brief return whether value is null
+    /// @sa https://json.qualla.me/api/basic_json/is_null/
+    constexpr bool is_null() const noexcept
+    {
+        return m_type == value_t::null;
+    }
+
+    /// @brief return whether value is a boolean
+    /// @sa https://json.qualla.me/api/basic_json/is_boolean/
+    constexpr bool is_boolean() const noexcept
+    {
+        return m_type == value_t::boolean;
+    }
+
+    /// @brief return whether value is a number
+    /// @sa https://json.qualla.me/api/basic_json/is_number/
+    constexpr bool is_number() const noexcept
+    {
+        return is_number_integer() || is_number_float();
+    }
+
+    /// @brief return whether value is an integer number
+    /// @sa https://json.qualla.me/api/basic_json/is_number_integer/
+    constexpr bool is_number_integer() const noexcept
+    {
+        return m_type == value_t::number_integer || m_type == value_t::number_unsigned;
+    }
+
+    /// @brief return whether value is an unsigned integer number
+    /// @sa https://json.qualla.me/api/basic_json/is_number_unsigned/
+    constexpr bool is_number_unsigned() const noexcept
+    {
+        return m_type == value_t::number_unsigned;
+    }
+
+    /// @brief return whether value is a floating-point number
+    /// @sa https://json.qualla.me/api/basic_json/is_number_float/
+    constexpr bool is_number_float() const noexcept
+    {
+        return m_type == value_t::number_float;
+    }
+
+    /// @brief return whether value is an object
+    /// @sa https://json.qualla.me/api/basic_json/is_object/
+    constexpr bool is_object() const noexcept
+    {
+        return m_type == value_t::object;
+    }
+
+    /// @brief return whether value is an array
+    /// @sa https://json.qualla.me/api/basic_json/is_array/
+    constexpr bool is_array() const noexcept
+    {
+        return m_type == value_t::array;
+    }
+
+    /// @brief return whether value is a string
+    /// @sa https://json.qualla.me/api/basic_json/is_string/
+    constexpr bool is_string() const noexcept
+    {
+        return m_type == value_t::string;
+    }
+
+    /// @brief return whether value is a binary array
+    /// @sa https://json.qualla.me/api/basic_json/is_binary/
+    constexpr bool is_binary() const noexcept
+    {
+        return m_type == value_t::binary;
+    }
+
+    /// @brief return whether value is discarded
+    /// @sa https://json.qualla.me/api/basic_json/is_discarded/
+    constexpr bool is_discarded() const noexcept
+    {
+        return m_type == value_t::discarded;
+    }
+
+    /// @brief return the type of the JSON value (implicit)
+    /// @sa https://json.qualla.me/api/basic_json/operator_value_t/
+    constexpr operator value_t() const noexcept
+    {
+        return m_type;
+    }
+
+    /// @}
+
+  private:
+    //////////////////
+    // value access //
+    //////////////////
+
+    /// get a boolean (explicit)
+    boolean_t get_impl(boolean_t* /*unused*/) const
+    {
+        if (JSON_HEDLEY_LIKELY(is_boolean()))
+        {
+            return m_value.boolean;
+        }
+
+        JSON_THROW(type_error::create(302, detail::concat("type must be boolean, but is ", type_name()), this));
+    }
+
+    /// get a pointer to the value (object)
+    object_t* get_impl_ptr(object_t* /*unused*/) noexcept
+    {
+        return is_object() ? m_value.object : nullptr;
+    }
+
+    /// get a pointer to the value (object)
+    constexpr const object_t* get_impl_ptr(const object_t* /*unused*/) const noexcept
+    {
+        return is_object() ? m_value.object : nullptr;
+    }
+
+    /// get a pointer to the value (array)
+    array_t* get_impl_ptr(array_t* /*unused*/) noexcept
+    {
+        return is_array() ? m_value.array : nullptr;
+    }
+
+    /// get a pointer to the value (array)
+    constexpr const array_t* get_impl_ptr(const array_t* /*unused*/) const noexcept
+    {
+        return is_array() ? m_value.array : nullptr;
+    }
+
+    /// get a pointer to the value (string)
+    string_t* get_impl_ptr(string_t* /*unused*/) noexcept
+    {
+        return is_string() ? m_value.string : nullptr;
+    }
+
+    /// get a pointer to the value (string)
+    constexpr const string_t* get_impl_ptr(const string_t* /*unused*/) const noexcept
+    {
+        return is_string() ? m_value.string : nullptr;
+    }
+
+    /// get a pointer to the value (boolean)
+    boolean_t* get_impl_ptr(boolean_t* /*unused*/) noexcept
+    {
+        return is_boolean() ? &m_value.boolean : nullptr;
+    }
+
+    /// get a pointer to the value (boolean)
+    constexpr const boolean_t* get_impl_ptr(const boolean_t* /*unused*/) const noexcept
+    {
+        return is_boolean() ? &m_value.boolean : nullptr;
+    }
+
+    /// get a pointer to the value (integer number)
+    number_integer_t* get_impl_ptr(number_integer_t* /*unused*/) noexcept
+    {
+        return is_number_integer() ? &m_value.number_integer : nullptr;
+    }
+
+    /// get a pointer to the value (integer number)
+    constexpr const number_integer_t* get_impl_ptr(const number_integer_t* /*unused*/) const noexcept
+    {
+        return is_number_integer() ? &m_value.number_integer : nullptr;
+    }
+
+    /// get a pointer to the value (unsigned number)
+    number_unsigned_t* get_impl_ptr(number_unsigned_t* /*unused*/) noexcept
+    {
+        return is_number_unsigned() ? &m_value.number_unsigned : nullptr;
+    }
+
+    /// get a pointer to the value (unsigned number)
+    constexpr const number_unsigned_t* get_impl_ptr(const number_unsigned_t* /*unused*/) const noexcept
+    {
+        return is_number_unsigned() ? &m_value.number_unsigned : nullptr;
+    }
+
+    /// get a pointer to the value (floating-point number)
+    number_float_t* get_impl_ptr(number_float_t* /*unused*/) noexcept
+    {
+        return is_number_float() ? &m_value.number_float : nullptr;
+    }
+
+    /// get a pointer to the value (floating-point number)
+    constexpr const number_float_t* get_impl_ptr(const number_float_t* /*unused*/) const noexcept
+    {
+        return is_number_float() ? &m_value.number_float : nullptr;
+    }
+
+    /// get a pointer to the value (binary)
+    binary_t* get_impl_ptr(binary_t* /*unused*/) noexcept
+    {
+        return is_binary() ? m_value.binary : nullptr;
+    }
+
+    /// get a pointer to the value (binary)
+    constexpr const binary_t* get_impl_ptr(const binary_t* /*unused*/) const noexcept
+    {
+        return is_binary() ? m_value.binary : nullptr;
+    }
+
+    /*!
+    @brief helper function to implement get_ref()
+
+    This function helps to implement get_ref() without code duplication for
+    const and non-const overloads
+
+    @tparam ThisType will be deduced as `basic_json` or `const basic_json`
+
+    @throw type_error.303 if ReferenceType does not match underlying value
+    type of the current JSON
+    */
+    template<typename ReferenceType, typename ThisType>
+    static ReferenceType get_ref_impl(ThisType& obj)
+    {
+        // delegate the call to get_ptr<>()
+        auto* ptr = obj.template get_ptr<typename std::add_pointer<ReferenceType>::type>();
+
+        if (JSON_HEDLEY_LIKELY(ptr != nullptr))
+        {
+            return *ptr;
+        }
+
+        JSON_THROW(type_error::create(303, detail::concat("incompatible ReferenceType for get_ref, actual type is ", obj.type_name()), &obj));
+    }
+
+  public:
+    /// @name value access
+    /// Direct access to the stored value of a JSON value.
+    /// @{
+
+    /// @brief get a pointer value (implicit)
+    /// @sa https://json.qualla.me/api/basic_json/get_ptr/
+    template<typename PointerType, typename std::enable_if<
+                 std::is_pointer<PointerType>::value, int>::type = 0>
+    auto get_ptr() noexcept -> decltype(std::declval<basic_json_t&>().get_impl_ptr(std::declval<PointerType>()))
+    {
+        // delegate the call to get_impl_ptr<>()
+        return get_impl_ptr(static_cast<PointerType>(nullptr));
+    }
+
+    /// @brief get a pointer value (implicit)
+    /// @sa https://json.qualla.me/api/basic_json/get_ptr/
+    template < typename PointerType, typename std::enable_if <
+                   std::is_pointer<PointerType>::value&&
+                   std::is_const<typename std::remove_pointer<PointerType>::type>::value, int >::type = 0 >
+    constexpr auto get_ptr() const noexcept -> decltype(std::declval<const basic_json_t&>().get_impl_ptr(std::declval<PointerType>()))
+    {
+        // delegate the call to get_impl_ptr<>() const
+        return get_impl_ptr(static_cast<PointerType>(nullptr));
+    }
+
+  private:
+    /*!
+    @brief get a value (explicit)
+
+    Explicit type conversion between the JSON value and a compatible value
+    which is [CopyConstructible](https://en.cppreference.com/w/cpp/named_req/CopyConstructible)
+    and [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible).
+    The value is converted by calling the @ref json_serializer<ValueType>
+    `from_json()` method.
+
+    The function is equivalent to executing
+    @code {.cpp}
+    ValueType ret;
+    JSONSerializer<ValueType>::from_json(*this, ret);
+    return ret;
+    @endcode
+
+    This overloads is chosen if:
+    - @a ValueType is not @ref basic_json,
+    - @ref json_serializer<ValueType> has a `from_json()` method of the form
+      `void from_json(const basic_json&, ValueType&)`, and
+    - @ref json_serializer<ValueType> does not have a `from_json()` method of
+      the form `ValueType from_json(const basic_json&)`
+
+    @tparam ValueType the returned value type
+
+    @return copy of the JSON value, converted to @a ValueType
+
+    @throw what @ref json_serializer<ValueType> `from_json()` method throws
+
+    @liveexample{The example below shows several conversions from JSON values
+    to other types. There a few things to note: (1) Floating-point numbers can
+    be converted to integers\, (2) A JSON array can be converted to a standard
+    `std::vector<short>`\, (3) A JSON object can be converted to C++
+    associative containers such as `std::unordered_map<std::string\,
+    json>`.,get__ValueType_const}
+
+    @since version 2.1.0
+    */
+    template < typename ValueType,
+               detail::enable_if_t <
+                   detail::is_default_constructible<ValueType>::value&&
+                   detail::has_from_json<basic_json_t, ValueType>::value,
+                   int > = 0 >
+    ValueType get_impl(detail::priority_tag<0> /*unused*/) const noexcept(noexcept(
+                JSONSerializer<ValueType>::from_json(std::declval<const basic_json_t&>(), std::declval<ValueType&>())))
+    {
+        auto ret = ValueType();
+        JSONSerializer<ValueType>::from_json(*this, ret);
+        return ret;
+    }
+
+    /*!
+    @brief get a value (explicit); special case
+
+    Explicit type conversion between the JSON value and a compatible value
+    which is **not** [CopyConstructible](https://en.cppreference.com/w/cpp/named_req/CopyConstructible)
+    and **not** [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible).
+    The value is converted by calling the @ref json_serializer<ValueType>
+    `from_json()` method.
+
+    The function is equivalent to executing
+    @code {.cpp}
+    return JSONSerializer<ValueType>::from_json(*this);
+    @endcode
+
+    This overloads is chosen if:
+    - @a ValueType is not @ref basic_json and
+    - @ref json_serializer<ValueType> has a `from_json()` method of the form
+      `ValueType from_json(const basic_json&)`
+
+    @note If @ref json_serializer<ValueType> has both overloads of
+    `from_json()`, this one is chosen.
+
+    @tparam ValueType the returned value type
+
+    @return copy of the JSON value, converted to @a ValueType
+
+    @throw what @ref json_serializer<ValueType> `from_json()` method throws
+
+    @since version 2.1.0
+    */
+    template < typename ValueType,
+               detail::enable_if_t <
+                   detail::has_non_default_from_json<basic_json_t, ValueType>::value,
+                   int > = 0 >
+    ValueType get_impl(detail::priority_tag<1> /*unused*/) const noexcept(noexcept(
+                JSONSerializer<ValueType>::from_json(std::declval<const basic_json_t&>())))
+    {
+        return JSONSerializer<ValueType>::from_json(*this);
+    }
+
+    /*!
+    @brief get special-case overload
+
+    This overloads converts the current @ref basic_json in a different
+    @ref basic_json type
+
+    @tparam BasicJsonType == @ref basic_json
+
+    @return a copy of *this, converted into @a BasicJsonType
+
+    @complexity Depending on the implementation of the called `from_json()`
+                method.
+
+    @since version 3.2.0
+    */
+    template < typename BasicJsonType,
+               detail::enable_if_t <
+                   detail::is_basic_json<BasicJsonType>::value,
+                   int > = 0 >
+    BasicJsonType get_impl(detail::priority_tag<2> /*unused*/) const
+    {
+        return *this;
+    }
+
+    /*!
+    @brief get special-case overload
+
+    This overloads avoids a lot of template boilerplate, it can be seen as the
+    identity method
+
+    @tparam BasicJsonType == @ref basic_json
+
+    @return a copy of *this
+
+    @complexity Constant.
+
+    @since version 2.1.0
+    */
+    template<typename BasicJsonType,
+             detail::enable_if_t<
+                 std::is_same<BasicJsonType, basic_json_t>::value,
+                 int> = 0>
+    basic_json get_impl(detail::priority_tag<3> /*unused*/) const
+    {
+        return *this;
+    }
+
+    /*!
+    @brief get a pointer value (explicit)
+    @copydoc get()
+    */
+    template<typename PointerType,
+             detail::enable_if_t<
+                 std::is_pointer<PointerType>::value,
+                 int> = 0>
+    constexpr auto get_impl(detail::priority_tag<4> /*unused*/) const noexcept
+    -> decltype(std::declval<const basic_json_t&>().template get_ptr<PointerType>())
+    {
+        // delegate the call to get_ptr
+        return get_ptr<PointerType>();
+    }
+
+  public:
+    /*!
+    @brief get a (pointer) value (explicit)
+
+    Performs explicit type conversion between the JSON value and a compatible value if required.
+
+    - If the requested type is a pointer to the internally stored JSON value that pointer is returned.
+    No copies are made.
+
+    - If the requested type is the current @ref basic_json, or a different @ref basic_json convertible
+    from the current @ref basic_json.
+
+    - Otherwise the value is converted by calling the @ref json_serializer<ValueType> `from_json()`
+    method.
+
+    @tparam ValueTypeCV the provided value type
+    @tparam ValueType the returned value type
+
+    @return copy of the JSON value, converted to @tparam ValueType if necessary
+
+    @throw what @ref json_serializer<ValueType> `from_json()` method throws if conversion is required
+
+    @since version 2.1.0
+    */
+    template < typename ValueTypeCV, typename ValueType = detail::uncvref_t<ValueTypeCV>>
+#if defined(JSON_HAS_CPP_14)
+    constexpr
+#endif
+    auto get() const noexcept(
+    noexcept(std::declval<const basic_json_t&>().template get_impl<ValueType>(detail::priority_tag<4> {})))
+    -> decltype(std::declval<const basic_json_t&>().template get_impl<ValueType>(detail::priority_tag<4> {}))
+    {
+        // we cannot static_assert on ValueTypeCV being non-const, because
+        // there is support for get<const basic_json_t>(), which is why we
+        // still need the uncvref
+        static_assert(!std::is_reference<ValueTypeCV>::value,
+                      "get() cannot be used with reference types, you might want to use get_ref()");
+        return get_impl<ValueType>(detail::priority_tag<4> {});
+    }
+
+    /*!
+    @brief get a pointer value (explicit)
+
+    Explicit pointer access to the internally stored JSON value. No copies are
+    made.
+
+    @warning The pointer becomes invalid if the underlying JSON object
+    changes.
+
+    @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref
+    object_t, @ref string_t, @ref boolean_t, @ref number_integer_t,
+    @ref number_unsigned_t, or @ref number_float_t.
+
+    @return pointer to the internally stored JSON value if the requested
+    pointer type @a PointerType fits to the JSON value; `nullptr` otherwise
+
+    @complexity Constant.
+
+    @liveexample{The example below shows how pointers to internal values of a
+    JSON value can be requested. Note that no type conversions are made and a
+    `nullptr` is returned if the value and the requested pointer type does not
+    match.,get__PointerType}
+
+    @sa see @ref get_ptr() for explicit pointer-member access
+
+    @since version 1.0.0
+    */
+    template<typename PointerType, typename std::enable_if<
+                 std::is_pointer<PointerType>::value, int>::type = 0>
+    auto get() noexcept -> decltype(std::declval<basic_json_t&>().template get_ptr<PointerType>())
+    {
+        // delegate the call to get_ptr
+        return get_ptr<PointerType>();
+    }
+
+    /// @brief get a value (explicit)
+    /// @sa https://json.qualla.me/api/basic_json/get_to/
+    template < typename ValueType,
+               detail::enable_if_t <
+                   !detail::is_basic_json<ValueType>::value&&
+                   detail::has_from_json<basic_json_t, ValueType>::value,
+                   int > = 0 >
+    ValueType & get_to(ValueType& v) const noexcept(noexcept(
+                JSONSerializer<ValueType>::from_json(std::declval<const basic_json_t&>(), v)))
+    {
+        JSONSerializer<ValueType>::from_json(*this, v);
+        return v;
+    }
+
+    // specialization to allow calling get_to with a basic_json value
+    // see https://github.com/nlohmann/json/issues/2175
+    template<typename ValueType,
+             detail::enable_if_t <
+                 detail::is_basic_json<ValueType>::value,
+                 int> = 0>
+    ValueType & get_to(ValueType& v) const
+    {
+        v = *this;
+        return v;
+    }
+
+    template <
+        typename T, std::size_t N,
+        typename Array = T (&)[N], // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
+        detail::enable_if_t <
+            detail::has_from_json<basic_json_t, Array>::value, int > = 0 >
+    Array get_to(T (&v)[N]) const // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
+    noexcept(noexcept(JSONSerializer<Array>::from_json(
+                          std::declval<const basic_json_t&>(), v)))
+    {
+        JSONSerializer<Array>::from_json(*this, v);
+        return v;
+    }
+
+    /// @brief get a reference value (implicit)
+    /// @sa https://json.qualla.me/api/basic_json/get_ref/
+    template<typename ReferenceType, typename std::enable_if<
+                 std::is_reference<ReferenceType>::value, int>::type = 0>
+    ReferenceType get_ref()
+    {
+        // delegate call to get_ref_impl
+        return get_ref_impl<ReferenceType>(*this);
+    }
+
+    /// @brief get a reference value (implicit)
+    /// @sa https://json.qualla.me/api/basic_json/get_ref/
+    template < typename ReferenceType, typename std::enable_if <
+                   std::is_reference<ReferenceType>::value&&
+                   std::is_const<typename std::remove_reference<ReferenceType>::type>::value, int >::type = 0 >
+    ReferenceType get_ref() const
+    {
+        // delegate call to get_ref_impl
+        return get_ref_impl<ReferenceType>(*this);
+    }
+
+    /*!
+    @brief get a value (implicit)
+
+    Implicit type conversion between the JSON value and a compatible value.
+    The call is realized by calling @ref get() const.
+
+    @tparam ValueType non-pointer type compatible to the JSON value, for
+    instance `int` for JSON integer numbers, `bool` for JSON booleans, or
+    `std::vector` types for JSON arrays. The character type of @ref string_t
+    as well as an initializer list of this type is excluded to avoid
+    ambiguities as these types implicitly convert to `std::string`.
+
+    @return copy of the JSON value, converted to type @a ValueType
+
+    @throw type_error.302 in case passed type @a ValueType is incompatible
+    to the JSON value type (e.g., the JSON value is of type boolean, but a
+    string is requested); see example below
+
+    @complexity Linear in the size of the JSON value.
+
+    @liveexample{The example below shows several conversions from JSON values
+    to other types. There a few things to note: (1) Floating-point numbers can
+    be converted to integers\, (2) A JSON array can be converted to a standard
+    `std::vector<short>`\, (3) A JSON object can be converted to C++
+    associative containers such as `std::unordered_map<std::string\,
+    json>`.,operator__ValueType}
+
+    @since version 1.0.0
+    */
+    template < typename ValueType, typename std::enable_if <
+                   detail::conjunction <
+                       detail::negation<std::is_pointer<ValueType>>,
+                       detail::negation<std::is_same<ValueType, std::nullptr_t>>,
+                       detail::negation<std::is_same<ValueType, detail::json_ref<basic_json>>>,
+                                        detail::negation<std::is_same<ValueType, typename string_t::value_type>>,
+                                        detail::negation<detail::is_basic_json<ValueType>>,
+                                        detail::negation<std::is_same<ValueType, std::initializer_list<typename string_t::value_type>>>,
+#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914))
+                                                detail::negation<std::is_same<ValueType, std::string_view>>,
+#endif
+#if defined(JSON_HAS_CPP_17)
+                                                detail::negation<std::is_same<ValueType, std::any>>,
+#endif
+                                                detail::is_detected_lazy<detail::get_template_function, const basic_json_t&, ValueType>
+                                                >::value, int >::type = 0 >
+                                        JSON_EXPLICIT operator ValueType() const
+    {
+        // delegate the call to get<>() const
+        return get<ValueType>();
+    }
+
+    /// @brief get a binary value
+    /// @sa https://json.qualla.me/api/basic_json/get_binary/
+    binary_t& get_binary()
+    {
+        if (!is_binary())
+        {
+            JSON_THROW(type_error::create(302, detail::concat("type must be binary, but is ", type_name()), this));
+        }
+
+        return *get_ptr<binary_t*>();
+    }
+
+    /// @brief get a binary value
+    /// @sa https://json.qualla.me/api/basic_json/get_binary/
+    const binary_t& get_binary() const
+    {
+        if (!is_binary())
+        {
+            JSON_THROW(type_error::create(302, detail::concat("type must be binary, but is ", type_name()), this));
+        }
+
+        return *get_ptr<const binary_t*>();
+    }
+
+    /// @}
+
+
+    ////////////////////
+    // element access //
+    ////////////////////
+
+    /// @name element access
+    /// Access to the JSON value.
+    /// @{
+
+    /// @brief access specified array element with bounds checking
+    /// @sa https://json.qualla.me/api/basic_json/at/
+    reference at(size_type idx)
+    {
+        // at only works for arrays
+        if (JSON_HEDLEY_LIKELY(is_array()))
+        {
+            JSON_TRY
+            {
+                return set_parent(m_value.array->at(idx));
+            }
+            JSON_CATCH (std::out_of_range&)
+            {
+                // create better exception explanation
+                JSON_THROW(out_of_range::create(401, detail::concat("array index ", std::to_string(idx), " is out of range"), this));
+            }
+        }
+        else
+        {
+            JSON_THROW(type_error::create(304, detail::concat("cannot use at() with ", type_name()), this));
+        }
+    }
+
+    /// @brief access specified array element with bounds checking
+    /// @sa https://json.qualla.me/api/basic_json/at/
+    const_reference at(size_type idx) const
+    {
+        // at only works for arrays
+        if (JSON_HEDLEY_LIKELY(is_array()))
+        {
+            JSON_TRY
+            {
+                return m_value.array->at(idx);
+            }
+            JSON_CATCH (std::out_of_range&)
+            {
+                // create better exception explanation
+                JSON_THROW(out_of_range::create(401, detail::concat("array index ", std::to_string(idx), " is out of range"), this));
+            }
+        }
+        else
+        {
+            JSON_THROW(type_error::create(304, detail::concat("cannot use at() with ", type_name()), this));
+        }
+    }
+
+    /// @brief access specified object element with bounds checking
+    /// @sa https://json.qualla.me/api/basic_json/at/
+    reference at(const typename object_t::key_type& key)
+    {
+        // at only works for objects
+        if (JSON_HEDLEY_UNLIKELY(!is_object()))
+        {
+            JSON_THROW(type_error::create(304, detail::concat("cannot use at() with ", type_name()), this));
+        }
+
+        auto it = m_value.object->find(key);
+        if (it == m_value.object->end())
+        {
+            JSON_THROW(out_of_range::create(403, detail::concat("key '", key, "' not found"), this));
+        }
+        return set_parent(it->second);
+    }
+
+    /// @brief access specified object element with bounds checking
+    /// @sa https://json.qualla.me/api/basic_json/at/
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_basic_json_key_type<basic_json_t, KeyType>::value, int> = 0>
+    reference at(KeyType && key)
+    {
+        // at only works for objects
+        if (JSON_HEDLEY_UNLIKELY(!is_object()))
+        {
+            JSON_THROW(type_error::create(304, detail::concat("cannot use at() with ", type_name()), this));
+        }
+
+        auto it = m_value.object->find(std::forward<KeyType>(key));
+        if (it == m_value.object->end())
+        {
+            JSON_THROW(out_of_range::create(403, detail::concat("key '", string_t(std::forward<KeyType>(key)), "' not found"), this));
+        }
+        return set_parent(it->second);
+    }
+
+    /// @brief access specified object element with bounds checking
+    /// @sa https://json.qualla.me/api/basic_json/at/
+    const_reference at(const typename object_t::key_type& key) const
+    {
+        // at only works for objects
+        if (JSON_HEDLEY_UNLIKELY(!is_object()))
+        {
+            JSON_THROW(type_error::create(304, detail::concat("cannot use at() with ", type_name()), this));
+        }
+
+        auto it = m_value.object->find(key);
+        if (it == m_value.object->end())
+        {
+            JSON_THROW(out_of_range::create(403, detail::concat("key '", key, "' not found"), this));
+        }
+        return it->second;
+    }
+
+    /// @brief access specified object element with bounds checking
+    /// @sa https://json.qualla.me/api/basic_json/at/
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_basic_json_key_type<basic_json_t, KeyType>::value, int> = 0>
+    const_reference at(KeyType && key) const
+    {
+        // at only works for objects
+        if (JSON_HEDLEY_UNLIKELY(!is_object()))
+        {
+            JSON_THROW(type_error::create(304, detail::concat("cannot use at() with ", type_name()), this));
+        }
+
+        auto it = m_value.object->find(std::forward<KeyType>(key));
+        if (it == m_value.object->end())
+        {
+            JSON_THROW(out_of_range::create(403, detail::concat("key '", string_t(std::forward<KeyType>(key)), "' not found"), this));
+        }
+        return it->second;
+    }
+
+    /// @brief access specified array element
+    /// @sa https://json.qualla.me/api/basic_json/operator%5B%5D/
+    reference operator[](size_type idx)
+    {
+        // implicitly convert null value to an empty array
+        if (is_null())
+        {
+            m_type = value_t::array;
+            m_value.array = create<array_t>();
+            assert_invariant();
+        }
+
+        // operator[] only works for arrays
+        if (JSON_HEDLEY_LIKELY(is_array()))
+        {
+            // fill up array with null values if given idx is outside range
+            if (idx >= m_value.array->size())
+            {
+#if JSON_DIAGNOSTICS
+                // remember array size & capacity before resizing
+                const auto old_size = m_value.array->size();
+                const auto old_capacity = m_value.array->capacity();
+#endif
+                m_value.array->resize(idx + 1);
+
+#if JSON_DIAGNOSTICS
+                if (JSON_HEDLEY_UNLIKELY(m_value.array->capacity() != old_capacity))
+                {
+                    // capacity has changed: update all parents
+                    set_parents();
+                }
+                else
+                {
+                    // set parent for values added above
+                    set_parents(begin() + static_cast<typename iterator::difference_type>(old_size), static_cast<typename iterator::difference_type>(idx + 1 - old_size));
+                }
+#endif
+                assert_invariant();
+            }
+
+            return m_value.array->operator[](idx);
+        }
+
+        JSON_THROW(type_error::create(305, detail::concat("cannot use operator[] with a numeric argument with ", type_name()), this));
+    }
+
+    /// @brief access specified array element
+    /// @sa https://json.qualla.me/api/basic_json/operator%5B%5D/
+    const_reference operator[](size_type idx) const
+    {
+        // const operator[] only works for arrays
+        if (JSON_HEDLEY_LIKELY(is_array()))
+        {
+            return m_value.array->operator[](idx);
+        }
+
+        JSON_THROW(type_error::create(305, detail::concat("cannot use operator[] with a numeric argument with ", type_name()), this));
+    }
+
+    /// @brief access specified object element
+    /// @sa https://json.qualla.me/api/basic_json/operator%5B%5D/
+    reference operator[](typename object_t::key_type key)
+    {
+        // implicitly convert null value to an empty object
+        if (is_null())
+        {
+            m_type = value_t::object;
+            m_value.object = create<object_t>();
+            assert_invariant();
+        }
+
+        // operator[] only works for objects
+        if (JSON_HEDLEY_LIKELY(is_object()))
+        {
+            auto result = m_value.object->emplace(std::move(key), nullptr);
+            return set_parent(result.first->second);
+        }
+
+        JSON_THROW(type_error::create(305, detail::concat("cannot use operator[] with a string argument with ", type_name()), this));
+    }
+
+    /// @brief access specified object element
+    /// @sa https://json.qualla.me/api/basic_json/operator%5B%5D/
+    const_reference operator[](const typename object_t::key_type& key) const
+    {
+        // const operator[] only works for objects
+        if (JSON_HEDLEY_LIKELY(is_object()))
+        {
+            auto it = m_value.object->find(key);
+            JSON_ASSERT(it != m_value.object->end());
+            return it->second;
+        }
+
+        JSON_THROW(type_error::create(305, detail::concat("cannot use operator[] with a string argument with ", type_name()), this));
+    }
+
+    // these two functions resolve a (const) char * ambiguity affecting Clang and MSVC
+    // (they seemingly cannot be constrained to resolve the ambiguity)
+    template<typename T>
+    reference operator[](T* key)
+    {
+        return operator[](typename object_t::key_type(key));
+    }
+
+    template<typename T>
+    const_reference operator[](T* key) const
+    {
+        return operator[](typename object_t::key_type(key));
+    }
+
+    /// @brief access specified object element
+    /// @sa https://json.qualla.me/api/basic_json/operator%5B%5D/
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_basic_json_key_type<basic_json_t, KeyType>::value, int > = 0 >
+    reference operator[](KeyType && key)
+    {
+        // implicitly convert null value to an empty object
+        if (is_null())
+        {
+            m_type = value_t::object;
+            m_value.object = create<object_t>();
+            assert_invariant();
+        }
+
+        // operator[] only works for objects
+        if (JSON_HEDLEY_LIKELY(is_object()))
+        {
+            auto result = m_value.object->emplace(std::forward<KeyType>(key), nullptr);
+            return set_parent(result.first->second);
+        }
+
+        JSON_THROW(type_error::create(305, detail::concat("cannot use operator[] with a string argument with ", type_name()), this));
+    }
+
+    /// @brief access specified object element
+    /// @sa https://json.qualla.me/api/basic_json/operator%5B%5D/
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_basic_json_key_type<basic_json_t, KeyType>::value, int > = 0 >
+    const_reference operator[](KeyType && key) const
+    {
+        // const operator[] only works for objects
+        if (JSON_HEDLEY_LIKELY(is_object()))
+        {
+            auto it = m_value.object->find(std::forward<KeyType>(key));
+            JSON_ASSERT(it != m_value.object->end());
+            return it->second;
+        }
+
+        JSON_THROW(type_error::create(305, detail::concat("cannot use operator[] with a string argument with ", type_name()), this));
+    }
+
+  private:
+    template<typename KeyType>
+    using is_comparable_with_object_key = detail::is_comparable <
+        object_comparator_t, const typename object_t::key_type&, KeyType >;
+
+    template<typename ValueType>
+    using value_return_type = std::conditional <
+        detail::is_c_string_uncvref<ValueType>::value,
+        string_t, typename std::decay<ValueType>::type >;
+
+  public:
+    /// @brief access specified object element with default value
+    /// @sa https://json.qualla.me/api/basic_json/value/
+    template < class ValueType, detail::enable_if_t <
+                   !detail::is_transparent<object_comparator_t>::value
+                   && detail::is_getable<basic_json_t, ValueType>::value
+                   && !std::is_same<value_t, detail::uncvref_t<ValueType>>::value, int > = 0 >
+    ValueType value(const typename object_t::key_type& key, const ValueType& default_value) const
+    {
+        // value only works for objects
+        if (JSON_HEDLEY_LIKELY(is_object()))
+        {
+            // if key is found, return value and given default value otherwise
+            const auto it = find(key);
+            if (it != end())
+            {
+                return it->template get<ValueType>();
+            }
+
+            return default_value;
+        }
+
+        JSON_THROW(type_error::create(306, detail::concat("cannot use value() with ", type_name()), this));
+    }
+
+    /// @brief access specified object element with default value
+    /// @sa https://json.qualla.me/api/basic_json/value/
+    template < class ValueType, class ReturnType = typename value_return_type<ValueType>::type,
+               detail::enable_if_t <
+                   !detail::is_transparent<object_comparator_t>::value
+                   && detail::is_getable<basic_json_t, ReturnType>::value
+                   && !std::is_same<value_t, detail::uncvref_t<ValueType>>::value, int > = 0 >
+    ReturnType value(const typename object_t::key_type& key, ValueType && default_value) const
+    {
+        // value only works for objects
+        if (JSON_HEDLEY_LIKELY(is_object()))
+        {
+            // if key is found, return value and given default value otherwise
+            const auto it = find(key);
+            if (it != end())
+            {
+                return it->template get<ReturnType>();
+            }
+
+            return std::forward<ValueType>(default_value);
+        }
+
+        JSON_THROW(type_error::create(306, detail::concat("cannot use value() with ", type_name()), this));
+    }
+
+    /// @brief access specified object element with default value
+    /// @sa https://json.qualla.me/api/basic_json/value/
+    template < class ValueType, class KeyType, detail::enable_if_t <
+                   detail::is_transparent<object_comparator_t>::value
+                   && !detail::is_json_pointer<KeyType>::value
+                   && is_comparable_with_object_key<KeyType>::value
+                   && detail::is_getable<basic_json_t, ValueType>::value
+                   && !std::is_same<value_t, detail::uncvref_t<ValueType>>::value, int > = 0 >
+    ValueType value(KeyType && key, const ValueType& default_value) const
+    {
+        // value only works for objects
+        if (JSON_HEDLEY_LIKELY(is_object()))
+        {
+            // if key is found, return value and given default value otherwise
+            const auto it = find(std::forward<KeyType>(key));
+            if (it != end())
+            {
+                return it->template get<ValueType>();
+            }
+
+            return default_value;
+        }
+
+        JSON_THROW(type_error::create(306, detail::concat("cannot use value() with ", type_name()), this));
+    }
+
+    /// @brief access specified object element via JSON Pointer with default value
+    /// @sa https://json.qualla.me/api/basic_json/value/
+    template < class ValueType, class KeyType, class ReturnType = typename value_return_type<ValueType>::type,
+               detail::enable_if_t <
+                   detail::is_transparent<object_comparator_t>::value
+                   && !detail::is_json_pointer<KeyType>::value
+                   && is_comparable_with_object_key<KeyType>::value
+                   && detail::is_getable<basic_json_t, ReturnType>::value
+                   && !std::is_same<value_t, detail::uncvref_t<ValueType>>::value, int > = 0 >
+    ReturnType value(KeyType && key, ValueType && default_value) const
+    {
+        // value only works for objects
+        if (JSON_HEDLEY_LIKELY(is_object()))
+        {
+            // if key is found, return value and given default value otherwise
+            const auto it = find(std::forward<KeyType>(key));
+            if (it != end())
+            {
+                return it->template get<ReturnType>();
+            }
+
+            return std::forward<ValueType>(default_value);
+        }
+
+        JSON_THROW(type_error::create(306, detail::concat("cannot use value() with ", type_name()), this));
+    }
+
+    /// @brief access specified object element via JSON Pointer with default value
+    /// @sa https://json.qualla.me/api/basic_json/value/
+    template < class ValueType, detail::enable_if_t <
+                   detail::is_getable<basic_json_t, ValueType>::value
+                   && !std::is_same<value_t, detail::uncvref_t<ValueType>>::value, int > = 0 >
+    ValueType value(const json_pointer& ptr, const ValueType& default_value) const
+    {
+        // value only works for objects
+        if (JSON_HEDLEY_LIKELY(is_object()))
+        {
+            // if pointer resolves a value, return it or use default value
+            JSON_TRY
+            {
+                return ptr.get_checked(this).template get<ValueType>();
+            }
+            JSON_INTERNAL_CATCH (out_of_range&)
+            {
+                return default_value;
+            }
+        }
+
+        JSON_THROW(type_error::create(306, detail::concat("cannot use value() with ", type_name()), this));
+    }
+
+    /// @brief access specified object element via JSON Pointer with default value
+    /// @sa https://json.qualla.me/api/basic_json/value/
+    template < class ValueType, class ReturnType = typename value_return_type<ValueType>::type,
+               detail::enable_if_t <
+                   detail::is_getable<basic_json_t, ReturnType>::value
+                   && !std::is_same<value_t, detail::uncvref_t<ValueType>>::value, int > = 0 >
+    ReturnType value(const json_pointer& ptr, ValueType && default_value) const
+    {
+        // value only works for objects
+        if (JSON_HEDLEY_LIKELY(is_object()))
+        {
+            // if pointer resolves a value, return it or use default value
+            JSON_TRY
+            {
+                return ptr.get_checked(this).template get<ReturnType>();
+            }
+            JSON_INTERNAL_CATCH (out_of_range&)
+            {
+                return std::forward<ValueType>(default_value);
+            }
+        }
+
+        JSON_THROW(type_error::create(306, detail::concat("cannot use value() with ", type_name()), this));
+    }
+
+    template < class ValueType, class BasicJsonType, detail::enable_if_t <
+                   detail::is_basic_json<BasicJsonType>::value
+                   && detail::is_getable<basic_json_t, ValueType>::value
+                   && !std::is_same<value_t, detail::uncvref_t<ValueType>>::value, int > = 0 >
+    JSON_HEDLEY_DEPRECATED_FOR(3.11.0, basic_json::json_pointer or qualla::json_pointer<basic_json::string_t>) // NOLINT(readability/alt_tokens)
+    ValueType value(const ::qualla::json_pointer<BasicJsonType>& ptr, const ValueType& default_value) const
+    {
+        return value(ptr.convert(), default_value);
+    }
+
+    template < class ValueType, class BasicJsonType, class ReturnType = typename value_return_type<ValueType>::type,
+               detail::enable_if_t <
+                   detail::is_basic_json<BasicJsonType>::value
+                   && detail::is_getable<basic_json_t, ReturnType>::value
+                   && !std::is_same<value_t, detail::uncvref_t<ValueType>>::value, int > = 0 >
+    JSON_HEDLEY_DEPRECATED_FOR(3.11.0, basic_json::json_pointer or qualla::json_pointer<basic_json::string_t>) // NOLINT(readability/alt_tokens)
+    ReturnType value(const ::qualla::json_pointer<BasicJsonType>& ptr, ValueType && default_value) const
+    {
+        return value(ptr.convert(), std::forward<ValueType>(default_value));
+    }
+
+    /// @brief access the first element
+    /// @sa https://json.qualla.me/api/basic_json/front/
+    reference front()
+    {
+        return *begin();
+    }
+
+    /// @brief access the first element
+    /// @sa https://json.qualla.me/api/basic_json/front/
+    const_reference front() const
+    {
+        return *cbegin();
+    }
+
+    /// @brief access the last element
+    /// @sa https://json.qualla.me/api/basic_json/back/
+    reference back()
+    {
+        auto tmp = end();
+        --tmp;
+        return *tmp;
+    }
+
+    /// @brief access the last element
+    /// @sa https://json.qualla.me/api/basic_json/back/
+    const_reference back() const
+    {
+        auto tmp = cend();
+        --tmp;
+        return *tmp;
+    }
+
+    /// @brief remove element given an iterator
+    /// @sa https://json.qualla.me/api/basic_json/erase/
+    template < class IteratorType, detail::enable_if_t <
+                   std::is_same<IteratorType, typename basic_json_t::iterator>::value ||
+                   std::is_same<IteratorType, typename basic_json_t::const_iterator>::value, int > = 0 >
+    IteratorType erase(IteratorType pos)
+    {
+        // make sure iterator fits the current value
+        if (JSON_HEDLEY_UNLIKELY(this != pos.m_object))
+        {
+            JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value", this));
+        }
+
+        IteratorType result = end();
+
+        switch (m_type)
+        {
+            case value_t::boolean:
+            case value_t::number_float:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::string:
+            case value_t::binary:
+            {
+                if (JSON_HEDLEY_UNLIKELY(!pos.m_it.primitive_iterator.is_begin()))
+                {
+                    JSON_THROW(invalid_iterator::create(205, "iterator out of range", this));
+                }
+
+                if (is_string())
+                {
+                    AllocatorType<string_t> alloc;
+                    std::allocator_traits<decltype(alloc)>::destroy(alloc, m_value.string);
+                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, m_value.string, 1);
+                    m_value.string = nullptr;
+                }
+                else if (is_binary())
+                {
+                    AllocatorType<binary_t> alloc;
+                    std::allocator_traits<decltype(alloc)>::destroy(alloc, m_value.binary);
+                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, m_value.binary, 1);
+                    m_value.binary = nullptr;
+                }
+
+                m_type = value_t::null;
+                assert_invariant();
+                break;
+            }
+
+            case value_t::object:
+            {
+                result.m_it.object_iterator = m_value.object->erase(pos.m_it.object_iterator);
+                break;
+            }
+
+            case value_t::array:
+            {
+                result.m_it.array_iterator = m_value.array->erase(pos.m_it.array_iterator);
+                break;
+            }
+
+            case value_t::null:
+            case value_t::discarded:
+            default:
+                JSON_THROW(type_error::create(307, detail::concat("cannot use erase() with ", type_name()), this));
+        }
+
+        return result;
+    }
+
+    /// @brief remove elements given an iterator range
+    /// @sa https://json.qualla.me/api/basic_json/erase/
+    template < class IteratorType, detail::enable_if_t <
+                   std::is_same<IteratorType, typename basic_json_t::iterator>::value ||
+                   std::is_same<IteratorType, typename basic_json_t::const_iterator>::value, int > = 0 >
+    IteratorType erase(IteratorType first, IteratorType last)
+    {
+        // make sure iterator fits the current value
+        if (JSON_HEDLEY_UNLIKELY(this != first.m_object || this != last.m_object))
+        {
+            JSON_THROW(invalid_iterator::create(203, "iterators do not fit current value", this));
+        }
+
+        IteratorType result = end();
+
+        switch (m_type)
+        {
+            case value_t::boolean:
+            case value_t::number_float:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::string:
+            case value_t::binary:
+            {
+                if (JSON_HEDLEY_LIKELY(!first.m_it.primitive_iterator.is_begin()
+                                       || !last.m_it.primitive_iterator.is_end()))
+                {
+                    JSON_THROW(invalid_iterator::create(204, "iterators out of range", this));
+                }
+
+                if (is_string())
+                {
+                    AllocatorType<string_t> alloc;
+                    std::allocator_traits<decltype(alloc)>::destroy(alloc, m_value.string);
+                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, m_value.string, 1);
+                    m_value.string = nullptr;
+                }
+                else if (is_binary())
+                {
+                    AllocatorType<binary_t> alloc;
+                    std::allocator_traits<decltype(alloc)>::destroy(alloc, m_value.binary);
+                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, m_value.binary, 1);
+                    m_value.binary = nullptr;
+                }
+
+                m_type = value_t::null;
+                assert_invariant();
+                break;
+            }
+
+            case value_t::object:
+            {
+                result.m_it.object_iterator = m_value.object->erase(first.m_it.object_iterator,
+                                              last.m_it.object_iterator);
+                break;
+            }
+
+            case value_t::array:
+            {
+                result.m_it.array_iterator = m_value.array->erase(first.m_it.array_iterator,
+                                             last.m_it.array_iterator);
+                break;
+            }
+
+            case value_t::null:
+            case value_t::discarded:
+            default:
+                JSON_THROW(type_error::create(307, detail::concat("cannot use erase() with ", type_name()), this));
+        }
+
+        return result;
+    }
+
+  private:
+    template < typename KeyType, detail::enable_if_t <
+                   detail::has_erase_with_key_type<basic_json_t, KeyType>::value, int > = 0 >
+    size_type erase_internal(KeyType && key)
+    {
+        // this erase only works for objects
+        if (JSON_HEDLEY_UNLIKELY(!is_object()))
+        {
+            JSON_THROW(type_error::create(307, detail::concat("cannot use erase() with ", type_name()), this));
+        }
+
+        return m_value.object->erase(std::forward<KeyType>(key));
+    }
+
+    template < typename KeyType, detail::enable_if_t <
+                   !detail::has_erase_with_key_type<basic_json_t, KeyType>::value, int > = 0 >
+    size_type erase_internal(KeyType && key)
+    {
+        // this erase only works for objects
+        if (JSON_HEDLEY_UNLIKELY(!is_object()))
+        {
+            JSON_THROW(type_error::create(307, detail::concat("cannot use erase() with ", type_name()), this));
+        }
+
+        const auto it = m_value.object->find(std::forward<KeyType>(key));
+        if (it != m_value.object->end())
+        {
+            m_value.object->erase(it);
+            return 1;
+        }
+        return 0;
+    }
+
+  public:
+
+    /// @brief remove element from a JSON object given a key
+    /// @sa https://json.qualla.me/api/basic_json/erase/
+    size_type erase(const typename object_t::key_type& key)
+    {
+        // the indirection via erase_internal() is added to avoid making this
+        // function a template and thus de-rank it during overload resolution
+        return erase_internal(key);
+    }
+
+    /// @brief remove element from a JSON object given a key
+    /// @sa https://json.qualla.me/api/basic_json/erase/
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_basic_json_key_type<basic_json_t, KeyType>::value, int> = 0>
+    size_type erase(KeyType && key)
+    {
+        return erase_internal(std::forward<KeyType>(key));
+    }
+
+    /// @brief remove element from a JSON array given an index
+    /// @sa https://json.qualla.me/api/basic_json/erase/
+    void erase(const size_type idx)
+    {
+        // this erase only works for arrays
+        if (JSON_HEDLEY_LIKELY(is_array()))
+        {
+            if (JSON_HEDLEY_UNLIKELY(idx >= size()))
+            {
+                JSON_THROW(out_of_range::create(401, detail::concat("array index ", std::to_string(idx), " is out of range"), this));
+            }
+
+            m_value.array->erase(m_value.array->begin() + static_cast<difference_type>(idx));
+        }
+        else
+        {
+            JSON_THROW(type_error::create(307, detail::concat("cannot use erase() with ", type_name()), this));
+        }
+    }
+
+    /// @}
+
+
+    ////////////
+    // lookup //
+    ////////////
+
+    /// @name lookup
+    /// @{
+
+    /// @brief find an element in a JSON object
+    /// @sa https://json.qualla.me/api/basic_json/find/
+    iterator find(const typename object_t::key_type& key)
+    {
+        auto result = end();
+
+        if (is_object())
+        {
+            result.m_it.object_iterator = m_value.object->find(key);
+        }
+
+        return result;
+    }
+
+    /// @brief find an element in a JSON object
+    /// @sa https://json.qualla.me/api/basic_json/find/
+    const_iterator find(const typename object_t::key_type& key) const
+    {
+        auto result = cend();
+
+        if (is_object())
+        {
+            result.m_it.object_iterator = m_value.object->find(key);
+        }
+
+        return result;
+    }
+
+    /// @brief find an element in a JSON object
+    /// @sa https://json.qualla.me/api/basic_json/find/
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_basic_json_key_type<basic_json_t, KeyType>::value, int> = 0>
+    iterator find(KeyType && key)
+    {
+        auto result = end();
+
+        if (is_object())
+        {
+            result.m_it.object_iterator = m_value.object->find(std::forward<KeyType>(key));
+        }
+
+        return result;
+    }
+
+    /// @brief find an element in a JSON object
+    /// @sa https://json.qualla.me/api/basic_json/find/
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_basic_json_key_type<basic_json_t, KeyType>::value, int> = 0>
+    const_iterator find(KeyType && key) const
+    {
+        auto result = cend();
+
+        if (is_object())
+        {
+            result.m_it.object_iterator = m_value.object->find(std::forward<KeyType>(key));
+        }
+
+        return result;
+    }
+
+    /// @brief returns the number of occurrences of a key in a JSON object
+    /// @sa https://json.qualla.me/api/basic_json/count/
+    size_type count(const typename object_t::key_type& key) const
+    {
+        // return 0 for all nonobject types
+        return is_object() ? m_value.object->count(key) : 0;
+    }
+
+    /// @brief returns the number of occurrences of a key in a JSON object
+    /// @sa https://json.qualla.me/api/basic_json/count/
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_basic_json_key_type<basic_json_t, KeyType>::value, int> = 0>
+    size_type count(KeyType && key) const
+    {
+        // return 0 for all nonobject types
+        return is_object() ? m_value.object->count(std::forward<KeyType>(key)) : 0;
+    }
+
+    /// @brief check the existence of an element in a JSON object
+    /// @sa https://json.qualla.me/api/basic_json/contains/
+    bool contains(const typename object_t::key_type& key) const
+    {
+        return is_object() && m_value.object->find(key) != m_value.object->end();
+    }
+
+    /// @brief check the existence of an element in a JSON object
+    /// @sa https://json.qualla.me/api/basic_json/contains/
+    template<class KeyType, detail::enable_if_t<
+                 detail::is_usable_as_basic_json_key_type<basic_json_t, KeyType>::value, int> = 0>
+    bool contains(KeyType && key) const
+    {
+        return is_object() && m_value.object->find(std::forward<KeyType>(key)) != m_value.object->end();
+    }
+
+    /// @brief check the existence of an element in a JSON object given a JSON pointer
+    /// @sa https://json.qualla.me/api/basic_json/contains/
+    bool contains(const json_pointer& ptr) const
+    {
+        return ptr.contains(this);
+    }
+
+    template<typename BasicJsonType, detail::enable_if_t<detail::is_basic_json<BasicJsonType>::value, int> = 0>
+    JSON_HEDLEY_DEPRECATED_FOR(3.11.0, basic_json::json_pointer or qualla::json_pointer<basic_json::string_t>) // NOLINT(readability/alt_tokens)
+    bool contains(const typename ::qualla::json_pointer<BasicJsonType>& ptr) const
+    {
+        return ptr.contains(this);
+    }
+
+    /// @}
+
+
+    ///////////////
+    // iterators //
+    ///////////////
+
+    /// @name iterators
+    /// @{
+
+    /// @brief returns an iterator to the first element
+    /// @sa https://json.qualla.me/api/basic_json/begin/
+    iterator begin() noexcept
+    {
+        iterator result(this);
+        result.set_begin();
+        return result;
+    }
+
+    /// @brief returns an iterator to the first element
+    /// @sa https://json.qualla.me/api/basic_json/begin/
+    const_iterator begin() const noexcept
+    {
+        return cbegin();
+    }
+
+    /// @brief returns a const iterator to the first element
+    /// @sa https://json.qualla.me/api/basic_json/cbegin/
+    const_iterator cbegin() const noexcept
+    {
+        const_iterator result(this);
+        result.set_begin();
+        return result;
+    }
+
+    /// @brief returns an iterator to one past the last element
+    /// @sa https://json.qualla.me/api/basic_json/end/
+    iterator end() noexcept
+    {
+        iterator result(this);
+        result.set_end();
+        return result;
+    }
+
+    /// @brief returns an iterator to one past the last element
+    /// @sa https://json.qualla.me/api/basic_json/end/
+    const_iterator end() const noexcept
+    {
+        return cend();
+    }
+
+    /// @brief returns an iterator to one past the last element
+    /// @sa https://json.qualla.me/api/basic_json/cend/
+    const_iterator cend() const noexcept
+    {
+        const_iterator result(this);
+        result.set_end();
+        return result;
+    }
+
+    /// @brief returns an iterator to the reverse-beginning
+    /// @sa https://json.qualla.me/api/basic_json/rbegin/
+    reverse_iterator rbegin() noexcept
+    {
+        return reverse_iterator(end());
+    }
+
+    /// @brief returns an iterator to the reverse-beginning
+    /// @sa https://json.qualla.me/api/basic_json/rbegin/
+    const_reverse_iterator rbegin() const noexcept
+    {
+        return crbegin();
+    }
+
+    /// @brief returns an iterator to the reverse-end
+    /// @sa https://json.qualla.me/api/basic_json/rend/
+    reverse_iterator rend() noexcept
+    {
+        return reverse_iterator(begin());
+    }
+
+    /// @brief returns an iterator to the reverse-end
+    /// @sa https://json.qualla.me/api/basic_json/rend/
+    const_reverse_iterator rend() const noexcept
+    {
+        return crend();
+    }
+
+    /// @brief returns a const reverse iterator to the last element
+    /// @sa https://json.qualla.me/api/basic_json/crbegin/
+    const_reverse_iterator crbegin() const noexcept
+    {
+        return const_reverse_iterator(cend());
+    }
+
+    /// @brief returns a const reverse iterator to one before the first
+    /// @sa https://json.qualla.me/api/basic_json/crend/
+    const_reverse_iterator crend() const noexcept
+    {
+        return const_reverse_iterator(cbegin());
+    }
+
+  public:
+    /// @brief wrapper to access iterator member functions in range-based for
+    /// @sa https://json.qualla.me/api/basic_json/items/
+    /// @deprecated This function is deprecated since 3.1.0 and will be removed in
+    ///             version 4.0.0 of the library. Please use @ref items() instead;
+    ///             that is, replace `json::iterator_wrapper(j)` with `j.items()`.
+    JSON_HEDLEY_DEPRECATED_FOR(3.1.0, items())
+    static iteration_proxy<iterator> iterator_wrapper(reference ref) noexcept
+    {
+        return ref.items();
+    }
+
+    /// @brief wrapper to access iterator member functions in range-based for
+    /// @sa https://json.qualla.me/api/basic_json/items/
+    /// @deprecated This function is deprecated since 3.1.0 and will be removed in
+    ///         version 4.0.0 of the library. Please use @ref items() instead;
+    ///         that is, replace `json::iterator_wrapper(j)` with `j.items()`.
+    JSON_HEDLEY_DEPRECATED_FOR(3.1.0, items())
+    static iteration_proxy<const_iterator> iterator_wrapper(const_reference ref) noexcept
+    {
+        return ref.items();
+    }
+
+    /// @brief helper to access iterator member functions in range-based for
+    /// @sa https://json.qualla.me/api/basic_json/items/
+    iteration_proxy<iterator> items() noexcept
+    {
+        return iteration_proxy<iterator>(*this);
+    }
+
+    /// @brief helper to access iterator member functions in range-based for
+    /// @sa https://json.qualla.me/api/basic_json/items/
+    iteration_proxy<const_iterator> items() const noexcept
+    {
+        return iteration_proxy<const_iterator>(*this);
+    }
+
+    /// @}
+
+
+    //////////////
+    // capacity //
+    //////////////
+
+    /// @name capacity
+    /// @{
+
+    /// @brief checks whether the container is empty.
+    /// @sa https://json.qualla.me/api/basic_json/empty/
+    bool empty() const noexcept
+    {
+        switch (m_type)
+        {
+            case value_t::null:
+            {
+                // null values are empty
+                return true;
+            }
+
+            case value_t::array:
+            {
+                // delegate call to array_t::empty()
+                return m_value.array->empty();
+            }
+
+            case value_t::object:
+            {
+                // delegate call to object_t::empty()
+                return m_value.object->empty();
+            }
+
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                // all other types are nonempty
+                return false;
+            }
+        }
+    }
+
+    /// @brief returns the number of elements
+    /// @sa https://json.qualla.me/api/basic_json/size/
+    size_type size() const noexcept
+    {
+        switch (m_type)
+        {
+            case value_t::null:
+            {
+                // null values are empty
+                return 0;
+            }
+
+            case value_t::array:
+            {
+                // delegate call to array_t::size()
+                return m_value.array->size();
+            }
+
+            case value_t::object:
+            {
+                // delegate call to object_t::size()
+                return m_value.object->size();
+            }
+
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                // all other types have size 1
+                return 1;
+            }
+        }
+    }
+
+    /// @brief returns the maximum possible number of elements
+    /// @sa https://json.qualla.me/api/basic_json/max_size/
+    size_type max_size() const noexcept
+    {
+        switch (m_type)
+        {
+            case value_t::array:
+            {
+                // delegate call to array_t::max_size()
+                return m_value.array->max_size();
+            }
+
+            case value_t::object:
+            {
+                // delegate call to object_t::max_size()
+                return m_value.object->max_size();
+            }
+
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                // all other types have max_size() == size()
+                return size();
+            }
+        }
+    }
+
+    /// @}
+
+
+    ///////////////
+    // modifiers //
+    ///////////////
+
+    /// @name modifiers
+    /// @{
+
+    /// @brief clears the contents
+    /// @sa https://json.qualla.me/api/basic_json/clear/
+    void clear() noexcept
+    {
+        switch (m_type)
+        {
+            case value_t::number_integer:
+            {
+                m_value.number_integer = 0;
+                break;
+            }
+
+            case value_t::number_unsigned:
+            {
+                m_value.number_unsigned = 0;
+                break;
+            }
+
+            case value_t::number_float:
+            {
+                m_value.number_float = 0.0;
+                break;
+            }
+
+            case value_t::boolean:
+            {
+                m_value.boolean = false;
+                break;
+            }
+
+            case value_t::string:
+            {
+                m_value.string->clear();
+                break;
+            }
+
+            case value_t::binary:
+            {
+                m_value.binary->clear();
+                break;
+            }
+
+            case value_t::array:
+            {
+                m_value.array->clear();
+                break;
+            }
+
+            case value_t::object:
+            {
+                m_value.object->clear();
+                break;
+            }
+
+            case value_t::null:
+            case value_t::discarded:
+            default:
+                break;
+        }
+    }
+
+    /// @brief add an object to an array
+    /// @sa https://json.qualla.me/api/basic_json/push_back/
+    void push_back(basic_json&& val)
+    {
+        // push_back only works for null objects or arrays
+        if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_array())))
+        {
+            JSON_THROW(type_error::create(308, detail::concat("cannot use push_back() with ", type_name()), this));
+        }
+
+        // transform null object into an array
+        if (is_null())
+        {
+            m_type = value_t::array;
+            m_value = value_t::array;
+            assert_invariant();
+        }
+
+        // add element to array (move semantics)
+        const auto old_capacity = m_value.array->capacity();
+        m_value.array->push_back(std::move(val));
+        set_parent(m_value.array->back(), old_capacity);
+        // if val is moved from, basic_json move constructor marks it null, so we do not call the destructor
+    }
+
+    /// @brief add an object to an array
+    /// @sa https://json.qualla.me/api/basic_json/operator+=/
+    reference operator+=(basic_json&& val)
+    {
+        push_back(std::move(val));
+        return *this;
+    }
+
+    /// @brief add an object to an array
+    /// @sa https://json.qualla.me/api/basic_json/push_back/
+    void push_back(const basic_json& val)
+    {
+        // push_back only works for null objects or arrays
+        if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_array())))
+        {
+            JSON_THROW(type_error::create(308, detail::concat("cannot use push_back() with ", type_name()), this));
+        }
+
+        // transform null object into an array
+        if (is_null())
+        {
+            m_type = value_t::array;
+            m_value = value_t::array;
+            assert_invariant();
+        }
+
+        // add element to array
+        const auto old_capacity = m_value.array->capacity();
+        m_value.array->push_back(val);
+        set_parent(m_value.array->back(), old_capacity);
+    }
+
+    /// @brief add an object to an array
+    /// @sa https://json.qualla.me/api/basic_json/operator+=/
+    reference operator+=(const basic_json& val)
+    {
+        push_back(val);
+        return *this;
+    }
+
+    /// @brief add an object to an object
+    /// @sa https://json.qualla.me/api/basic_json/push_back/
+    void push_back(const typename object_t::value_type& val)
+    {
+        // push_back only works for null objects or objects
+        if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_object())))
+        {
+            JSON_THROW(type_error::create(308, detail::concat("cannot use push_back() with ", type_name()), this));
+        }
+
+        // transform null object into an object
+        if (is_null())
+        {
+            m_type = value_t::object;
+            m_value = value_t::object;
+            assert_invariant();
+        }
+
+        // add element to object
+        auto res = m_value.object->insert(val);
+        set_parent(res.first->second);
+    }
+
+    /// @brief add an object to an object
+    /// @sa https://json.qualla.me/api/basic_json/operator+=/
+    reference operator+=(const typename object_t::value_type& val)
+    {
+        push_back(val);
+        return *this;
+    }
+
+    /// @brief add an object to an object
+    /// @sa https://json.qualla.me/api/basic_json/push_back/
+    void push_back(initializer_list_t init)
+    {
+        if (is_object() && init.size() == 2 && (*init.begin())->is_string())
+        {
+            basic_json&& key = init.begin()->moved_or_copied();
+            push_back(typename object_t::value_type(
+                          std::move(key.get_ref<string_t&>()), (init.begin() + 1)->moved_or_copied()));
+        }
+        else
+        {
+            push_back(basic_json(init));
+        }
+    }
+
+    /// @brief add an object to an object
+    /// @sa https://json.qualla.me/api/basic_json/operator+=/
+    reference operator+=(initializer_list_t init)
+    {
+        push_back(init);
+        return *this;
+    }
+
+    /// @brief add an object to an array
+    /// @sa https://json.qualla.me/api/basic_json/emplace_back/
+    template<class... Args>
+    reference emplace_back(Args&& ... args)
+    {
+        // emplace_back only works for null objects or arrays
+        if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_array())))
+        {
+            JSON_THROW(type_error::create(311, detail::concat("cannot use emplace_back() with ", type_name()), this));
+        }
+
+        // transform null object into an array
+        if (is_null())
+        {
+            m_type = value_t::array;
+            m_value = value_t::array;
+            assert_invariant();
+        }
+
+        // add element to array (perfect forwarding)
+        const auto old_capacity = m_value.array->capacity();
+        m_value.array->emplace_back(std::forward<Args>(args)...);
+        return set_parent(m_value.array->back(), old_capacity);
+    }
+
+    /// @brief add an object to an object if key does not exist
+    /// @sa https://json.qualla.me/api/basic_json/emplace/
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&& ... args)
+    {
+        // emplace only works for null objects or arrays
+        if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_object())))
+        {
+            JSON_THROW(type_error::create(311, detail::concat("cannot use emplace() with ", type_name()), this));
+        }
+
+        // transform null object into an object
+        if (is_null())
+        {
+            m_type = value_t::object;
+            m_value = value_t::object;
+            assert_invariant();
+        }
+
+        // add element to array (perfect forwarding)
+        auto res = m_value.object->emplace(std::forward<Args>(args)...);
+        set_parent(res.first->second);
+
+        // create result iterator and set iterator to the result of emplace
+        auto it = begin();
+        it.m_it.object_iterator = res.first;
+
+        // return pair of iterator and boolean
+        return {it, res.second};
+    }
+
+    /// Helper for insertion of an iterator
+    /// @note: This uses std::distance to support GCC 4.8,
+    ///        see https://github.com/nlohmann/json/pull/1257
+    template<typename... Args>
+    iterator insert_iterator(const_iterator pos, Args&& ... args)
+    {
+        iterator result(this);
+        JSON_ASSERT(m_value.array != nullptr);
+
+        auto insert_pos = std::distance(m_value.array->begin(), pos.m_it.array_iterator);
+        m_value.array->insert(pos.m_it.array_iterator, std::forward<Args>(args)...);
+        result.m_it.array_iterator = m_value.array->begin() + insert_pos;
+
+        // This could have been written as:
+        // result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, cnt, val);
+        // but the return value of insert is missing in GCC 4.8, so it is written this way instead.
+
+        set_parents();
+        return result;
+    }
+
+    /// @brief inserts element into array
+    /// @sa https://json.qualla.me/api/basic_json/insert/
+    iterator insert(const_iterator pos, const basic_json& val)
+    {
+        // insert only works for arrays
+        if (JSON_HEDLEY_LIKELY(is_array()))
+        {
+            // check if iterator pos fits to this JSON value
+            if (JSON_HEDLEY_UNLIKELY(pos.m_object != this))
+            {
+                JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value", this));
+            }
+
+            // insert to array and return iterator
+            return insert_iterator(pos, val);
+        }
+
+        JSON_THROW(type_error::create(309, detail::concat("cannot use insert() with ", type_name()), this));
+    }
+
+    /// @brief inserts element into array
+    /// @sa https://json.qualla.me/api/basic_json/insert/
+    iterator insert(const_iterator pos, basic_json&& val)
+    {
+        return insert(pos, val);
+    }
+
+    /// @brief inserts copies of element into array
+    /// @sa https://json.qualla.me/api/basic_json/insert/
+    iterator insert(const_iterator pos, size_type cnt, const basic_json& val)
+    {
+        // insert only works for arrays
+        if (JSON_HEDLEY_LIKELY(is_array()))
+        {
+            // check if iterator pos fits to this JSON value
+            if (JSON_HEDLEY_UNLIKELY(pos.m_object != this))
+            {
+                JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value", this));
+            }
+
+            // insert to array and return iterator
+            return insert_iterator(pos, cnt, val);
+        }
+
+        JSON_THROW(type_error::create(309, detail::concat("cannot use insert() with ", type_name()), this));
+    }
+
+    /// @brief inserts range of elements into array
+    /// @sa https://json.qualla.me/api/basic_json/insert/
+    iterator insert(const_iterator pos, const_iterator first, const_iterator last)
+    {
+        // insert only works for arrays
+        if (JSON_HEDLEY_UNLIKELY(!is_array()))
+        {
+            JSON_THROW(type_error::create(309, detail::concat("cannot use insert() with ", type_name()), this));
+        }
+
+        // check if iterator pos fits to this JSON value
+        if (JSON_HEDLEY_UNLIKELY(pos.m_object != this))
+        {
+            JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value", this));
+        }
+
+        // check if range iterators belong to the same JSON object
+        if (JSON_HEDLEY_UNLIKELY(first.m_object != last.m_object))
+        {
+            JSON_THROW(invalid_iterator::create(210, "iterators do not fit", this));
+        }
+
+        if (JSON_HEDLEY_UNLIKELY(first.m_object == this))
+        {
+            JSON_THROW(invalid_iterator::create(211, "passed iterators may not belong to container", this));
+        }
+
+        // insert to array and return iterator
+        return insert_iterator(pos, first.m_it.array_iterator, last.m_it.array_iterator);
+    }
+
+    /// @brief inserts elements from initializer list into array
+    /// @sa https://json.qualla.me/api/basic_json/insert/
+    iterator insert(const_iterator pos, initializer_list_t ilist)
+    {
+        // insert only works for arrays
+        if (JSON_HEDLEY_UNLIKELY(!is_array()))
+        {
+            JSON_THROW(type_error::create(309, detail::concat("cannot use insert() with ", type_name()), this));
+        }
+
+        // check if iterator pos fits to this JSON value
+        if (JSON_HEDLEY_UNLIKELY(pos.m_object != this))
+        {
+            JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value", this));
+        }
+
+        // insert to array and return iterator
+        return insert_iterator(pos, ilist.begin(), ilist.end());
+    }
+
+    /// @brief inserts range of elements into object
+    /// @sa https://json.qualla.me/api/basic_json/insert/
+    void insert(const_iterator first, const_iterator last)
+    {
+        // insert only works for objects
+        if (JSON_HEDLEY_UNLIKELY(!is_object()))
+        {
+            JSON_THROW(type_error::create(309, detail::concat("cannot use insert() with ", type_name()), this));
+        }
+
+        // check if range iterators belong to the same JSON object
+        if (JSON_HEDLEY_UNLIKELY(first.m_object != last.m_object))
+        {
+            JSON_THROW(invalid_iterator::create(210, "iterators do not fit", this));
+        }
+
+        // passed iterators must belong to objects
+        if (JSON_HEDLEY_UNLIKELY(!first.m_object->is_object()))
+        {
+            JSON_THROW(invalid_iterator::create(202, "iterators first and last must point to objects", this));
+        }
+
+        m_value.object->insert(first.m_it.object_iterator, last.m_it.object_iterator);
+    }
+
+    /// @brief updates a JSON object from another object, overwriting existing keys
+    /// @sa https://json.qualla.me/api/basic_json/update/
+    void update(const_reference j, bool merge_objects = false)
+    {
+        update(j.begin(), j.end(), merge_objects);
+    }
+
+    /// @brief updates a JSON object from another object, overwriting existing keys
+    /// @sa https://json.qualla.me/api/basic_json/update/
+    void update(const_iterator first, const_iterator last, bool merge_objects = false)
+    {
+        // implicitly convert null value to an empty object
+        if (is_null())
+        {
+            m_type = value_t::object;
+            m_value.object = create<object_t>();
+            assert_invariant();
+        }
+
+        if (JSON_HEDLEY_UNLIKELY(!is_object()))
+        {
+            JSON_THROW(type_error::create(312, detail::concat("cannot use update() with ", type_name()), this));
+        }
+
+        // check if range iterators belong to the same JSON object
+        if (JSON_HEDLEY_UNLIKELY(first.m_object != last.m_object))
+        {
+            JSON_THROW(invalid_iterator::create(210, "iterators do not fit", this));
+        }
+
+        // passed iterators must belong to objects
+        if (JSON_HEDLEY_UNLIKELY(!first.m_object->is_object()))
+        {
+            JSON_THROW(type_error::create(312, detail::concat("cannot use update() with ", first.m_object->type_name()), first.m_object));
+        }
+
+        for (auto it = first; it != last; ++it)
+        {
+            if (merge_objects && it.value().is_object())
+            {
+                auto it2 = m_value.object->find(it.key());
+                if (it2 != m_value.object->end())
+                {
+                    it2->second.update(it.value(), true);
+                    continue;
+                }
+            }
+            m_value.object->operator[](it.key()) = it.value();
+#if JSON_DIAGNOSTICS
+            m_value.object->operator[](it.key()).m_parent = this;
+#endif
+        }
+    }
+
+    /// @brief exchanges the values
+    /// @sa https://json.qualla.me/api/basic_json/swap/
+    void swap(reference other) noexcept (
+        std::is_nothrow_move_constructible<value_t>::value&&
+        std::is_nothrow_move_assignable<value_t>::value&&
+        std::is_nothrow_move_constructible<json_value>::value&&
+        std::is_nothrow_move_assignable<json_value>::value
+    )
+    {
+        std::swap(m_type, other.m_type);
+        std::swap(m_value, other.m_value);
+
+        set_parents();
+        other.set_parents();
+        assert_invariant();
+    }
+
+    /// @brief exchanges the values
+    /// @sa https://json.qualla.me/api/basic_json/swap/
+    friend void swap(reference left, reference right) noexcept (
+        std::is_nothrow_move_constructible<value_t>::value&&
+        std::is_nothrow_move_assignable<value_t>::value&&
+        std::is_nothrow_move_constructible<json_value>::value&&
+        std::is_nothrow_move_assignable<json_value>::value
+    )
+    {
+        left.swap(right);
+    }
+
+    /// @brief exchanges the values
+    /// @sa https://json.qualla.me/api/basic_json/swap/
+    void swap(array_t& other) // NOLINT(bugprone-exception-escape)
+    {
+        // swap only works for arrays
+        if (JSON_HEDLEY_LIKELY(is_array()))
+        {
+            using std::swap;
+            swap(*(m_value.array), other);
+        }
+        else
+        {
+            JSON_THROW(type_error::create(310, detail::concat("cannot use swap(array_t&) with ", type_name()), this));
+        }
+    }
+
+    /// @brief exchanges the values
+    /// @sa https://json.qualla.me/api/basic_json/swap/
+    void swap(object_t& other) // NOLINT(bugprone-exception-escape)
+    {
+        // swap only works for objects
+        if (JSON_HEDLEY_LIKELY(is_object()))
+        {
+            using std::swap;
+            swap(*(m_value.object), other);
+        }
+        else
+        {
+            JSON_THROW(type_error::create(310, detail::concat("cannot use swap(object_t&) with ", type_name()), this));
+        }
+    }
+
+    /// @brief exchanges the values
+    /// @sa https://json.qualla.me/api/basic_json/swap/
+    void swap(string_t& other) // NOLINT(bugprone-exception-escape)
+    {
+        // swap only works for strings
+        if (JSON_HEDLEY_LIKELY(is_string()))
+        {
+            using std::swap;
+            swap(*(m_value.string), other);
+        }
+        else
+        {
+            JSON_THROW(type_error::create(310, detail::concat("cannot use swap(string_t&) with ", type_name()), this));
+        }
+    }
+
+    /// @brief exchanges the values
+    /// @sa https://json.qualla.me/api/basic_json/swap/
+    void swap(binary_t& other) // NOLINT(bugprone-exception-escape)
+    {
+        // swap only works for strings
+        if (JSON_HEDLEY_LIKELY(is_binary()))
+        {
+            using std::swap;
+            swap(*(m_value.binary), other);
+        }
+        else
+        {
+            JSON_THROW(type_error::create(310, detail::concat("cannot use swap(binary_t&) with ", type_name()), this));
+        }
+    }
+
+    /// @brief exchanges the values
+    /// @sa https://json.qualla.me/api/basic_json/swap/
+    void swap(typename binary_t::container_type& other) // NOLINT(bugprone-exception-escape)
+    {
+        // swap only works for strings
+        if (JSON_HEDLEY_LIKELY(is_binary()))
+        {
+            using std::swap;
+            swap(*(m_value.binary), other);
+        }
+        else
+        {
+            JSON_THROW(type_error::create(310, detail::concat("cannot use swap(binary_t::container_type&) with ", type_name()), this));
+        }
+    }
+
+    /// @}
+
+    //////////////////////////////////////////
+    // lexicographical comparison operators //
+    //////////////////////////////////////////
+
+    /// @name lexicographical comparison operators
+    /// @{
+
+    // note parentheses around operands are necessary; see
+    // https://github.com/nlohmann/json/issues/1530
+#define JSON_IMPLEMENT_OPERATOR(op, null_result, unordered_result, default_result)                       \
+    const auto lhs_type = lhs.type();                                                                    \
+    const auto rhs_type = rhs.type();                                                                    \
+    \
+    if (lhs_type == rhs_type) /* NOLINT(readability/braces) */                                           \
+    {                                                                                                    \
+        switch (lhs_type)                                                                                \
+        {                                                                                                \
+            case value_t::array:                                                                         \
+                return (*lhs.m_value.array) op (*rhs.m_value.array);                                     \
+                \
+            case value_t::object:                                                                        \
+                return (*lhs.m_value.object) op (*rhs.m_value.object);                                   \
+                \
+            case value_t::null:                                                                          \
+                return (null_result);                                                                    \
+                \
+            case value_t::string:                                                                        \
+                return (*lhs.m_value.string) op (*rhs.m_value.string);                                   \
+                \
+            case value_t::boolean:                                                                       \
+                return (lhs.m_value.boolean) op (rhs.m_value.boolean);                                   \
+                \
+            case value_t::number_integer:                                                                \
+                return (lhs.m_value.number_integer) op (rhs.m_value.number_integer);                     \
+                \
+            case value_t::number_unsigned:                                                               \
+                return (lhs.m_value.number_unsigned) op (rhs.m_value.number_unsigned);                   \
+                \
+            case value_t::number_float:                                                                  \
+                return (lhs.m_value.number_float) op (rhs.m_value.number_float);                         \
+                \
+            case value_t::binary:                                                                        \
+                return (*lhs.m_value.binary) op (*rhs.m_value.binary);                                   \
+                \
+            case value_t::discarded:                                                                     \
+            default:                                                                                     \
+                return (unordered_result);                                                               \
+        }                                                                                                \
+    }                                                                                                    \
+    else if (lhs_type == value_t::number_integer && rhs_type == value_t::number_float)                   \
+    {                                                                                                    \
+        return static_cast<number_float_t>(lhs.m_value.number_integer) op rhs.m_value.number_float;      \
+    }                                                                                                    \
+    else if (lhs_type == value_t::number_float && rhs_type == value_t::number_integer)                   \
+    {                                                                                                    \
+        return lhs.m_value.number_float op static_cast<number_float_t>(rhs.m_value.number_integer);      \
+    }                                                                                                    \
+    else if (lhs_type == value_t::number_unsigned && rhs_type == value_t::number_float)                  \
+    {                                                                                                    \
+        return static_cast<number_float_t>(lhs.m_value.number_unsigned) op rhs.m_value.number_float;     \
+    }                                                                                                    \
+    else if (lhs_type == value_t::number_float && rhs_type == value_t::number_unsigned)                  \
+    {                                                                                                    \
+        return lhs.m_value.number_float op static_cast<number_float_t>(rhs.m_value.number_unsigned);     \
+    }                                                                                                    \
+    else if (lhs_type == value_t::number_unsigned && rhs_type == value_t::number_integer)                \
+    {                                                                                                    \
+        return static_cast<number_integer_t>(lhs.m_value.number_unsigned) op rhs.m_value.number_integer; \
+    }                                                                                                    \
+    else if (lhs_type == value_t::number_integer && rhs_type == value_t::number_unsigned)                \
+    {                                                                                                    \
+        return lhs.m_value.number_integer op static_cast<number_integer_t>(rhs.m_value.number_unsigned); \
+    }                                                                                                    \
+    else if(compares_unordered(lhs, rhs))\
+    {\
+        return (unordered_result);\
+    }\
+    \
+    return (default_result);
+
+  JSON_PRIVATE_UNLESS_TESTED:
+    // returns true if:
+    // - any operand is NaN and the other operand is of number type
+    // - any operand is discarded
+    // in legacy mode, discarded values are considered ordered if
+    // an operation is computed as an odd number of inverses of others
+    static bool compares_unordered(const_reference lhs, const_reference rhs, bool inverse = false) noexcept
+    {
+        if ((lhs.is_number_float() && std::isnan(lhs.m_value.number_float) && rhs.is_number())
+                || (rhs.is_number_float() && std::isnan(rhs.m_value.number_float) && lhs.is_number()))
+        {
+            return true;
+        }
+#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
+        return (lhs.is_discarded() || rhs.is_discarded()) && !inverse;
+#else
+        static_cast<void>(inverse);
+        return lhs.is_discarded() || rhs.is_discarded();
+#endif
+    }
+
+  private:
+    bool compares_unordered(const_reference rhs, bool inverse = false) const noexcept
+    {
+        return compares_unordered(*this, rhs, inverse);
+    }
+
+  public:
+#if JSON_HAS_THREE_WAY_COMPARISON
+    /// @brief comparison: equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_eq/
+    bool operator==(const_reference rhs) const noexcept
+    {
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#endif
+        const_reference lhs = *this;
+        JSON_IMPLEMENT_OPERATOR( ==, true, false, false)
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+    }
+
+    /// @brief comparison: equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_eq/
+    template<typename ScalarType>
+    requires std::is_scalar_v<ScalarType>
+    bool operator==(ScalarType rhs) const noexcept
+    {
+        return *this == basic_json(rhs);
+    }
+
+    /// @brief comparison: not equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_ne/
+    bool operator!=(const_reference rhs) const noexcept
+    {
+        if (compares_unordered(rhs, true))
+        {
+            return false;
+        }
+        return !operator==(rhs);
+    }
+
+    /// @brief comparison: 3-way
+    /// @sa https://json.qualla.me/api/basic_json/operator_spaceship/
+    std::partial_ordering operator<=>(const_reference rhs) const noexcept // *NOPAD*
+    {
+        const_reference lhs = *this;
+        // default_result is used if we cannot compare values. In that case,
+        // we compare types.
+        JSON_IMPLEMENT_OPERATOR(<=>, // *NOPAD*
+                                std::partial_ordering::equivalent,
+                                std::partial_ordering::unordered,
+                                lhs_type <=> rhs_type) // *NOPAD*
+    }
+
+    /// @brief comparison: 3-way
+    /// @sa https://json.qualla.me/api/basic_json/operator_spaceship/
+    template<typename ScalarType>
+    requires std::is_scalar_v<ScalarType>
+    std::partial_ordering operator<=>(ScalarType rhs) const noexcept // *NOPAD*
+    {
+        return *this <=> basic_json(rhs); // *NOPAD*
+    }
+
+#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
+    // all operators that are computed as an odd number of inverses of others
+    // need to be overloaded to emulate the legacy comparison behavior
+
+    /// @brief comparison: less than or equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_le/
+    JSON_HEDLEY_DEPRECATED_FOR(3.11.0, undef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON)
+    bool operator<=(const_reference rhs) const noexcept
+    {
+        if (compares_unordered(rhs, true))
+        {
+            return false;
+        }
+        return !(rhs < *this);
+    }
+
+    /// @brief comparison: less than or equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_le/
+    template<typename ScalarType>
+    requires std::is_scalar_v<ScalarType>
+    bool operator<=(ScalarType rhs) const noexcept
+    {
+        return *this <= basic_json(rhs);
+    }
+
+    /// @brief comparison: greater than or equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_ge/
+    JSON_HEDLEY_DEPRECATED_FOR(3.11.0, undef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON)
+    bool operator>=(const_reference rhs) const noexcept
+    {
+        if (compares_unordered(rhs, true))
+        {
+            return false;
+        }
+        return !(*this < rhs);
+    }
+
+    /// @brief comparison: greater than or equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_ge/
+    template<typename ScalarType>
+    requires std::is_scalar_v<ScalarType>
+    bool operator>=(ScalarType rhs) const noexcept
+    {
+        return *this >= basic_json(rhs);
+    }
+#endif
+#else
+    /// @brief comparison: equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_eq/
+    friend bool operator==(const_reference lhs, const_reference rhs) noexcept
+    {
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#endif
+        JSON_IMPLEMENT_OPERATOR( ==, true, false, false)
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+    }
+
+    /// @brief comparison: equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_eq/
+    template<typename ScalarType, typename std::enable_if<
+                 std::is_scalar<ScalarType>::value, int>::type = 0>
+    friend bool operator==(const_reference lhs, ScalarType rhs) noexcept
+    {
+        return lhs == basic_json(rhs);
+    }
+
+    /// @brief comparison: equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_eq/
+    template<typename ScalarType, typename std::enable_if<
+                 std::is_scalar<ScalarType>::value, int>::type = 0>
+    friend bool operator==(ScalarType lhs, const_reference rhs) noexcept
+    {
+        return basic_json(lhs) == rhs;
+    }
+
+    /// @brief comparison: not equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_ne/
+    friend bool operator!=(const_reference lhs, const_reference rhs) noexcept
+    {
+        if (compares_unordered(lhs, rhs, true))
+        {
+            return false;
+        }
+        return !(lhs == rhs);
+    }
+
+    /// @brief comparison: not equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_ne/
+    template<typename ScalarType, typename std::enable_if<
+                 std::is_scalar<ScalarType>::value, int>::type = 0>
+    friend bool operator!=(const_reference lhs, ScalarType rhs) noexcept
+    {
+        return lhs != basic_json(rhs);
+    }
+
+    /// @brief comparison: not equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_ne/
+    template<typename ScalarType, typename std::enable_if<
+                 std::is_scalar<ScalarType>::value, int>::type = 0>
+    friend bool operator!=(ScalarType lhs, const_reference rhs) noexcept
+    {
+        return basic_json(lhs) != rhs;
+    }
+
+    /// @brief comparison: less than
+    /// @sa https://json.qualla.me/api/basic_json/operator_lt/
+    friend bool operator<(const_reference lhs, const_reference rhs) noexcept
+    {
+        // default_result is used if we cannot compare values. In that case,
+        // we compare types. Note we have to call the operator explicitly,
+        // because MSVC has problems otherwise.
+        JSON_IMPLEMENT_OPERATOR( <, false, false, operator<(lhs_type, rhs_type))
+    }
+
+    /// @brief comparison: less than
+    /// @sa https://json.qualla.me/api/basic_json/operator_lt/
+    template<typename ScalarType, typename std::enable_if<
+                 std::is_scalar<ScalarType>::value, int>::type = 0>
+    friend bool operator<(const_reference lhs, ScalarType rhs) noexcept
+    {
+        return lhs < basic_json(rhs);
+    }
+
+    /// @brief comparison: less than
+    /// @sa https://json.qualla.me/api/basic_json/operator_lt/
+    template<typename ScalarType, typename std::enable_if<
+                 std::is_scalar<ScalarType>::value, int>::type = 0>
+    friend bool operator<(ScalarType lhs, const_reference rhs) noexcept
+    {
+        return basic_json(lhs) < rhs;
+    }
+
+    /// @brief comparison: less than or equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_le/
+    friend bool operator<=(const_reference lhs, const_reference rhs) noexcept
+    {
+        if (compares_unordered(lhs, rhs, true))
+        {
+            return false;
+        }
+        return !(rhs < lhs);
+    }
+
+    /// @brief comparison: less than or equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_le/
+    template<typename ScalarType, typename std::enable_if<
+                 std::is_scalar<ScalarType>::value, int>::type = 0>
+    friend bool operator<=(const_reference lhs, ScalarType rhs) noexcept
+    {
+        return lhs <= basic_json(rhs);
+    }
+
+    /// @brief comparison: less than or equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_le/
+    template<typename ScalarType, typename std::enable_if<
+                 std::is_scalar<ScalarType>::value, int>::type = 0>
+    friend bool operator<=(ScalarType lhs, const_reference rhs) noexcept
+    {
+        return basic_json(lhs) <= rhs;
+    }
+
+    /// @brief comparison: greater than
+    /// @sa https://json.qualla.me/api/basic_json/operator_gt/
+    friend bool operator>(const_reference lhs, const_reference rhs) noexcept
+    {
+        // double inverse
+        if (compares_unordered(lhs, rhs))
+        {
+            return false;
+        }
+        return !(lhs <= rhs);
+    }
+
+    /// @brief comparison: greater than
+    /// @sa https://json.qualla.me/api/basic_json/operator_gt/
+    template<typename ScalarType, typename std::enable_if<
+                 std::is_scalar<ScalarType>::value, int>::type = 0>
+    friend bool operator>(const_reference lhs, ScalarType rhs) noexcept
+    {
+        return lhs > basic_json(rhs);
+    }
+
+    /// @brief comparison: greater than
+    /// @sa https://json.qualla.me/api/basic_json/operator_gt/
+    template<typename ScalarType, typename std::enable_if<
+                 std::is_scalar<ScalarType>::value, int>::type = 0>
+    friend bool operator>(ScalarType lhs, const_reference rhs) noexcept
+    {
+        return basic_json(lhs) > rhs;
+    }
+
+    /// @brief comparison: greater than or equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_ge/
+    friend bool operator>=(const_reference lhs, const_reference rhs) noexcept
+    {
+        if (compares_unordered(lhs, rhs, true))
+        {
+            return false;
+        }
+        return !(lhs < rhs);
+    }
+
+    /// @brief comparison: greater than or equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_ge/
+    template<typename ScalarType, typename std::enable_if<
+                 std::is_scalar<ScalarType>::value, int>::type = 0>
+    friend bool operator>=(const_reference lhs, ScalarType rhs) noexcept
+    {
+        return lhs >= basic_json(rhs);
+    }
+
+    /// @brief comparison: greater than or equal
+    /// @sa https://json.qualla.me/api/basic_json/operator_ge/
+    template<typename ScalarType, typename std::enable_if<
+                 std::is_scalar<ScalarType>::value, int>::type = 0>
+    friend bool operator>=(ScalarType lhs, const_reference rhs) noexcept
+    {
+        return basic_json(lhs) >= rhs;
+    }
+#endif
+
+#undef JSON_IMPLEMENT_OPERATOR
+
+    /// @}
+
+    ///////////////////
+    // serialization //
+    ///////////////////
+
+    /// @name serialization
+    /// @{
+#ifndef JSON_NO_IO
+    /// @brief serialize to stream
+    /// @sa https://json.qualla.me/api/basic_json/operator_ltlt/
+    friend std::ostream& operator<<(std::ostream& o, const basic_json& j)
+    {
+        // read width member and use it as indentation parameter if nonzero
+        const bool pretty_print = o.width() > 0;
+        const auto indentation = pretty_print ? o.width() : 0;
+
+        // reset width to 0 for subsequent calls to this stream
+        o.width(0);
+
+        // do the actual serialization
+        serializer s(detail::output_adapter<char>(o), o.fill());
+        s.dump(j, pretty_print, false, static_cast<unsigned int>(indentation));
+        return o;
+    }
+
+    /// @brief serialize to stream
+    /// @sa https://json.qualla.me/api/basic_json/operator_ltlt/
+    /// @deprecated This function is deprecated since 3.0.0 and will be removed in
+    ///             version 4.0.0 of the library. Please use
+    ///             operator<<(std::ostream&, const basic_json&) instead; that is,
+    ///             replace calls like `j >> o;` with `o << j;`.
+    JSON_HEDLEY_DEPRECATED_FOR(3.0.0, operator<<(std::ostream&, const basic_json&))
+    friend std::ostream& operator>>(const basic_json& j, std::ostream& o)
+    {
+        return o << j;
+    }
+#endif  // JSON_NO_IO
+    /// @}
+
+
+    /////////////////////
+    // deserialization //
+    /////////////////////
+
+    /// @name deserialization
+    /// @{
+
+    /// @brief deserialize from a compatible input
+    /// @sa https://json.qualla.me/api/basic_json/parse/
+    template<typename InputType>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json parse(InputType&& i,
+                            const parser_callback_t cb = nullptr,
+                            const bool allow_exceptions = true,
+                            const bool ignore_comments = false)
+    {
+        basic_json result;
+        parser(detail::input_adapter(std::forward<InputType>(i)), cb, allow_exceptions, ignore_comments).parse(true, result);
+        return result;
+    }
+
+    /// @brief deserialize from a pair of character iterators
+    /// @sa https://json.qualla.me/api/basic_json/parse/
+    template<typename IteratorType>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json parse(IteratorType first,
+                            IteratorType last,
+                            const parser_callback_t cb = nullptr,
+                            const bool allow_exceptions = true,
+                            const bool ignore_comments = false)
+    {
+        basic_json result;
+        parser(detail::input_adapter(std::move(first), std::move(last)), cb, allow_exceptions, ignore_comments).parse(true, result);
+        return result;
+    }
+
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, parse(ptr, ptr + len))
+    static basic_json parse(detail::span_input_adapter&& i,
+                            const parser_callback_t cb = nullptr,
+                            const bool allow_exceptions = true,
+                            const bool ignore_comments = false)
+    {
+        basic_json result;
+        parser(i.get(), cb, allow_exceptions, ignore_comments).parse(true, result);
+        return result;
+    }
+
+    /// @brief check if the input is valid JSON
+    /// @sa https://json.qualla.me/api/basic_json/accept/
+    template<typename InputType>
+    static bool accept(InputType&& i,
+                       const bool ignore_comments = false)
+    {
+        return parser(detail::input_adapter(std::forward<InputType>(i)), nullptr, false, ignore_comments).accept(true);
+    }
+
+    /// @brief check if the input is valid JSON
+    /// @sa https://json.qualla.me/api/basic_json/accept/
+    template<typename IteratorType>
+    static bool accept(IteratorType first, IteratorType last,
+                       const bool ignore_comments = false)
+    {
+        return parser(detail::input_adapter(std::move(first), std::move(last)), nullptr, false, ignore_comments).accept(true);
+    }
+
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, accept(ptr, ptr + len))
+    static bool accept(detail::span_input_adapter&& i,
+                       const bool ignore_comments = false)
+    {
+        return parser(i.get(), nullptr, false, ignore_comments).accept(true);
+    }
+
+    /// @brief generate SAX events
+    /// @sa https://json.qualla.me/api/basic_json/sax_parse/
+    template <typename InputType, typename SAX>
+    JSON_HEDLEY_NON_NULL(2)
+    static bool sax_parse(InputType&& i, SAX* sax,
+                          input_format_t format = input_format_t::json,
+                          const bool strict = true,
+                          const bool ignore_comments = false)
+    {
+        auto ia = detail::input_adapter(std::forward<InputType>(i));
+        return format == input_format_t::json
+               ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict)
+               : detail::binary_reader<basic_json, decltype(ia), SAX>(std::move(ia), format).sax_parse(format, sax, strict);
+    }
+
+    /// @brief generate SAX events
+    /// @sa https://json.qualla.me/api/basic_json/sax_parse/
+    template<class IteratorType, class SAX>
+    JSON_HEDLEY_NON_NULL(3)
+    static bool sax_parse(IteratorType first, IteratorType last, SAX* sax,
+                          input_format_t format = input_format_t::json,
+                          const bool strict = true,
+                          const bool ignore_comments = false)
+    {
+        auto ia = detail::input_adapter(std::move(first), std::move(last));
+        return format == input_format_t::json
+               ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict)
+               : detail::binary_reader<basic_json, decltype(ia), SAX>(std::move(ia), format).sax_parse(format, sax, strict);
+    }
+
+    /// @brief generate SAX events
+    /// @sa https://json.qualla.me/api/basic_json/sax_parse/
+    /// @deprecated This function is deprecated since 3.8.0 and will be removed in
+    ///             version 4.0.0 of the library. Please use
+    ///             sax_parse(ptr, ptr + len) instead.
+    template <typename SAX>
+    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, sax_parse(ptr, ptr + len, ...))
+    JSON_HEDLEY_NON_NULL(2)
+    static bool sax_parse(detail::span_input_adapter&& i, SAX* sax,
+                          input_format_t format = input_format_t::json,
+                          const bool strict = true,
+                          const bool ignore_comments = false)
+    {
+        auto ia = i.get();
+        return format == input_format_t::json
+               // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
+               ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict)
+               // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
+               : detail::binary_reader<basic_json, decltype(ia), SAX>(std::move(ia), format).sax_parse(format, sax, strict);
+    }
+#ifndef JSON_NO_IO
+    /// @brief deserialize from stream
+    /// @sa https://json.qualla.me/api/basic_json/operator_gtgt/
+    /// @deprecated This stream operator is deprecated since 3.0.0 and will be removed in
+    ///             version 4.0.0 of the library. Please use
+    ///             operator>>(std::istream&, basic_json&) instead; that is,
+    ///             replace calls like `j << i;` with `i >> j;`.
+    JSON_HEDLEY_DEPRECATED_FOR(3.0.0, operator>>(std::istream&, basic_json&))
+    friend std::istream& operator<<(basic_json& j, std::istream& i)
+    {
+        return operator>>(i, j);
+    }
+
+    /// @brief deserialize from stream
+    /// @sa https://json.qualla.me/api/basic_json/operator_gtgt/
+    friend std::istream& operator>>(std::istream& i, basic_json& j)
+    {
+        parser(detail::input_adapter(i)).parse(false, j);
+        return i;
+    }
+#endif  // JSON_NO_IO
+    /// @}
+
+    ///////////////////////////
+    // convenience functions //
+    ///////////////////////////
+
+    /// @brief return the type as string
+    /// @sa https://json.qualla.me/api/basic_json/type_name/
+    JSON_HEDLEY_RETURNS_NON_NULL
+    const char* type_name() const noexcept
+    {
+        switch (m_type)
+        {
+            case value_t::null:
+                return "null";
+            case value_t::object:
+                return "object";
+            case value_t::array:
+                return "array";
+            case value_t::string:
+                return "string";
+            case value_t::boolean:
+                return "boolean";
+            case value_t::binary:
+                return "binary";
+            case value_t::discarded:
+                return "discarded";
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            default:
+                return "number";
+        }
+    }
+
+
+  JSON_PRIVATE_UNLESS_TESTED:
+    //////////////////////
+    // member variables //
+    //////////////////////
+
+    /// the type of the current element
+    value_t m_type = value_t::null;
+
+    /// the value of the current element
+    json_value m_value = {};
+
+#if JSON_DIAGNOSTICS
+    /// a pointer to a parent value (for debugging purposes)
+    basic_json* m_parent = nullptr;
+#endif
+
+    //////////////////////////////////////////
+    // binary serialization/deserialization //
+    //////////////////////////////////////////
+
+    /// @name binary serialization/deserialization support
+    /// @{
+
+  public:
+    /// @brief create a CBOR serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_cbor/
+    static std::vector<std::uint8_t> to_cbor(const basic_json& j)
+    {
+        std::vector<std::uint8_t> result;
+        to_cbor(j, result);
+        return result;
+    }
+
+    /// @brief create a CBOR serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_cbor/
+    static void to_cbor(const basic_json& j, detail::output_adapter<std::uint8_t> o)
+    {
+        binary_writer<std::uint8_t>(o).write_cbor(j);
+    }
+
+    /// @brief create a CBOR serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_cbor/
+    static void to_cbor(const basic_json& j, detail::output_adapter<char> o)
+    {
+        binary_writer<char>(o).write_cbor(j);
+    }
+
+    /// @brief create a MessagePack serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_msgpack/
+    static std::vector<std::uint8_t> to_msgpack(const basic_json& j)
+    {
+        std::vector<std::uint8_t> result;
+        to_msgpack(j, result);
+        return result;
+    }
+
+    /// @brief create a MessagePack serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_msgpack/
+    static void to_msgpack(const basic_json& j, detail::output_adapter<std::uint8_t> o)
+    {
+        binary_writer<std::uint8_t>(o).write_msgpack(j);
+    }
+
+    /// @brief create a MessagePack serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_msgpack/
+    static void to_msgpack(const basic_json& j, detail::output_adapter<char> o)
+    {
+        binary_writer<char>(o).write_msgpack(j);
+    }
+
+    /// @brief create a UBJSON serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_ubjson/
+    static std::vector<std::uint8_t> to_ubjson(const basic_json& j,
+            const bool use_size = false,
+            const bool use_type = false)
+    {
+        std::vector<std::uint8_t> result;
+        to_ubjson(j, result, use_size, use_type);
+        return result;
+    }
+
+    /// @brief create a UBJSON serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_ubjson/
+    static void to_ubjson(const basic_json& j, detail::output_adapter<std::uint8_t> o,
+                          const bool use_size = false, const bool use_type = false)
+    {
+        binary_writer<std::uint8_t>(o).write_ubjson(j, use_size, use_type);
+    }
+
+    /// @brief create a UBJSON serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_ubjson/
+    static void to_ubjson(const basic_json& j, detail::output_adapter<char> o,
+                          const bool use_size = false, const bool use_type = false)
+    {
+        binary_writer<char>(o).write_ubjson(j, use_size, use_type);
+    }
+
+    /// @brief create a BJData serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_bjdata/
+    static std::vector<std::uint8_t> to_bjdata(const basic_json& j,
+            const bool use_size = false,
+            const bool use_type = false)
+    {
+        std::vector<std::uint8_t> result;
+        to_bjdata(j, result, use_size, use_type);
+        return result;
+    }
+
+    /// @brief create a BJData serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_bjdata/
+    static void to_bjdata(const basic_json& j, detail::output_adapter<std::uint8_t> o,
+                          const bool use_size = false, const bool use_type = false)
+    {
+        binary_writer<std::uint8_t>(o).write_ubjson(j, use_size, use_type, true, true);
+    }
+
+    /// @brief create a BJData serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_bjdata/
+    static void to_bjdata(const basic_json& j, detail::output_adapter<char> o,
+                          const bool use_size = false, const bool use_type = false)
+    {
+        binary_writer<char>(o).write_ubjson(j, use_size, use_type, true, true);
+    }
+
+    /// @brief create a BSON serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_bson/
+    static std::vector<std::uint8_t> to_bson(const basic_json& j)
+    {
+        std::vector<std::uint8_t> result;
+        to_bson(j, result);
+        return result;
+    }
+
+    /// @brief create a BSON serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_bson/
+    static void to_bson(const basic_json& j, detail::output_adapter<std::uint8_t> o)
+    {
+        binary_writer<std::uint8_t>(o).write_bson(j);
+    }
+
+    /// @brief create a BSON serialization of a given JSON value
+    /// @sa https://json.qualla.me/api/basic_json/to_bson/
+    static void to_bson(const basic_json& j, detail::output_adapter<char> o)
+    {
+        binary_writer<char>(o).write_bson(j);
+    }
+
+    /// @brief create a JSON value from an input in CBOR format
+    /// @sa https://json.qualla.me/api/basic_json/from_cbor/
+    template<typename InputType>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json from_cbor(InputType&& i,
+                                const bool strict = true,
+                                const bool allow_exceptions = true,
+                                const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = detail::input_adapter(std::forward<InputType>(i));
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+    /// @brief create a JSON value from an input in CBOR format
+    /// @sa https://json.qualla.me/api/basic_json/from_cbor/
+    template<typename IteratorType>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json from_cbor(IteratorType first, IteratorType last,
+                                const bool strict = true,
+                                const bool allow_exceptions = true,
+                                const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = detail::input_adapter(std::move(first), std::move(last));
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+    template<typename T>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_cbor(ptr, ptr + len))
+    static basic_json from_cbor(const T* ptr, std::size_t len,
+                                const bool strict = true,
+                                const bool allow_exceptions = true,
+                                const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
+    {
+        return from_cbor(ptr, ptr + len, strict, allow_exceptions, tag_handler);
+    }
+
+
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_cbor(ptr, ptr + len))
+    static basic_json from_cbor(detail::span_input_adapter&& i,
+                                const bool strict = true,
+                                const bool allow_exceptions = true,
+                                const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = i.get();
+        // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+    /// @brief create a JSON value from an input in MessagePack format
+    /// @sa https://json.qualla.me/api/basic_json/from_msgpack/
+    template<typename InputType>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json from_msgpack(InputType&& i,
+                                   const bool strict = true,
+                                   const bool allow_exceptions = true)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = detail::input_adapter(std::forward<InputType>(i));
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+    /// @brief create a JSON value from an input in MessagePack format
+    /// @sa https://json.qualla.me/api/basic_json/from_msgpack/
+    template<typename IteratorType>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json from_msgpack(IteratorType first, IteratorType last,
+                                   const bool strict = true,
+                                   const bool allow_exceptions = true)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = detail::input_adapter(std::move(first), std::move(last));
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+    template<typename T>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_msgpack(ptr, ptr + len))
+    static basic_json from_msgpack(const T* ptr, std::size_t len,
+                                   const bool strict = true,
+                                   const bool allow_exceptions = true)
+    {
+        return from_msgpack(ptr, ptr + len, strict, allow_exceptions);
+    }
+
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_msgpack(ptr, ptr + len))
+    static basic_json from_msgpack(detail::span_input_adapter&& i,
+                                   const bool strict = true,
+                                   const bool allow_exceptions = true)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = i.get();
+        // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+    /// @brief create a JSON value from an input in UBJSON format
+    /// @sa https://json.qualla.me/api/basic_json/from_ubjson/
+    template<typename InputType>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json from_ubjson(InputType&& i,
+                                  const bool strict = true,
+                                  const bool allow_exceptions = true)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = detail::input_adapter(std::forward<InputType>(i));
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+    /// @brief create a JSON value from an input in UBJSON format
+    /// @sa https://json.qualla.me/api/basic_json/from_ubjson/
+    template<typename IteratorType>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json from_ubjson(IteratorType first, IteratorType last,
+                                  const bool strict = true,
+                                  const bool allow_exceptions = true)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = detail::input_adapter(std::move(first), std::move(last));
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+    template<typename T>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_ubjson(ptr, ptr + len))
+    static basic_json from_ubjson(const T* ptr, std::size_t len,
+                                  const bool strict = true,
+                                  const bool allow_exceptions = true)
+    {
+        return from_ubjson(ptr, ptr + len, strict, allow_exceptions);
+    }
+
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_ubjson(ptr, ptr + len))
+    static basic_json from_ubjson(detail::span_input_adapter&& i,
+                                  const bool strict = true,
+                                  const bool allow_exceptions = true)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = i.get();
+        // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+
+    /// @brief create a JSON value from an input in BJData format
+    /// @sa https://json.qualla.me/api/basic_json/from_bjdata/
+    template<typename InputType>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json from_bjdata(InputType&& i,
+                                  const bool strict = true,
+                                  const bool allow_exceptions = true)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = detail::input_adapter(std::forward<InputType>(i));
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+    /// @brief create a JSON value from an input in BJData format
+    /// @sa https://json.qualla.me/api/basic_json/from_bjdata/
+    template<typename IteratorType>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json from_bjdata(IteratorType first, IteratorType last,
+                                  const bool strict = true,
+                                  const bool allow_exceptions = true)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = detail::input_adapter(std::move(first), std::move(last));
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+    /// @brief create a JSON value from an input in BSON format
+    /// @sa https://json.qualla.me/api/basic_json/from_bson/
+    template<typename InputType>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json from_bson(InputType&& i,
+                                const bool strict = true,
+                                const bool allow_exceptions = true)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = detail::input_adapter(std::forward<InputType>(i));
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+    /// @brief create a JSON value from an input in BSON format
+    /// @sa https://json.qualla.me/api/basic_json/from_bson/
+    template<typename IteratorType>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json from_bson(IteratorType first, IteratorType last,
+                                const bool strict = true,
+                                const bool allow_exceptions = true)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = detail::input_adapter(std::move(first), std::move(last));
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict);
+        return res ? result : basic_json(value_t::discarded);
+    }
+
+    template<typename T>
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_bson(ptr, ptr + len))
+    static basic_json from_bson(const T* ptr, std::size_t len,
+                                const bool strict = true,
+                                const bool allow_exceptions = true)
+    {
+        return from_bson(ptr, ptr + len, strict, allow_exceptions);
+    }
+
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_bson(ptr, ptr + len))
+    static basic_json from_bson(detail::span_input_adapter&& i,
+                                const bool strict = true,
+                                const bool allow_exceptions = true)
+    {
+        basic_json result;
+        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
+        auto ia = i.get();
+        // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
+        const bool res = binary_reader<decltype(ia)>(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict);
+        return res ? result : basic_json(value_t::discarded);
+    }
+    /// @}
+
+    //////////////////////////
+    // JSON Pointer support //
+    //////////////////////////
+
+    /// @name JSON Pointer functions
+    /// @{
+
+    /// @brief access specified element via JSON Pointer
+    /// @sa https://json.qualla.me/api/basic_json/operator%5B%5D/
+    reference operator[](const json_pointer& ptr)
+    {
+        return ptr.get_unchecked(this);
+    }
+
+    template<typename BasicJsonType, detail::enable_if_t<detail::is_basic_json<BasicJsonType>::value, int> = 0>
+    JSON_HEDLEY_DEPRECATED_FOR(3.11.0, basic_json::json_pointer or qualla::json_pointer<basic_json::string_t>) // NOLINT(readability/alt_tokens)
+    reference operator[](const ::qualla::json_pointer<BasicJsonType>& ptr)
+    {
+        return ptr.get_unchecked(this);
+    }
+
+    /// @brief access specified element via JSON Pointer
+    /// @sa https://json.qualla.me/api/basic_json/operator%5B%5D/
+    const_reference operator[](const json_pointer& ptr) const
+    {
+        return ptr.get_unchecked(this);
+    }
+
+    template<typename BasicJsonType, detail::enable_if_t<detail::is_basic_json<BasicJsonType>::value, int> = 0>
+    JSON_HEDLEY_DEPRECATED_FOR(3.11.0, basic_json::json_pointer or qualla::json_pointer<basic_json::string_t>) // NOLINT(readability/alt_tokens)
+    const_reference operator[](const ::qualla::json_pointer<BasicJsonType>& ptr) const
+    {
+        return ptr.get_unchecked(this);
+    }
+
+    /// @brief access specified element via JSON Pointer
+    /// @sa https://json.qualla.me/api/basic_json/at/
+    reference at(const json_pointer& ptr)
+    {
+        return ptr.get_checked(this);
+    }
+
+    template<typename BasicJsonType, detail::enable_if_t<detail::is_basic_json<BasicJsonType>::value, int> = 0>
+    JSON_HEDLEY_DEPRECATED_FOR(3.11.0, basic_json::json_pointer or qualla::json_pointer<basic_json::string_t>) // NOLINT(readability/alt_tokens)
+    reference at(const ::qualla::json_pointer<BasicJsonType>& ptr)
+    {
+        return ptr.get_checked(this);
+    }
+
+    /// @brief access specified element via JSON Pointer
+    /// @sa https://json.qualla.me/api/basic_json/at/
+    const_reference at(const json_pointer& ptr) const
+    {
+        return ptr.get_checked(this);
+    }
+
+    template<typename BasicJsonType, detail::enable_if_t<detail::is_basic_json<BasicJsonType>::value, int> = 0>
+    JSON_HEDLEY_DEPRECATED_FOR(3.11.0, basic_json::json_pointer or qualla::json_pointer<basic_json::string_t>) // NOLINT(readability/alt_tokens)
+    const_reference at(const ::qualla::json_pointer<BasicJsonType>& ptr) const
+    {
+        return ptr.get_checked(this);
+    }
+
+    /// @brief return flattened JSON value
+    /// @sa https://json.qualla.me/api/basic_json/flatten/
+    basic_json flatten() const
+    {
+        basic_json result(value_t::object);
+        json_pointer::flatten("", *this, result);
+        return result;
+    }
+
+    /// @brief unflatten a previously flattened JSON value
+    /// @sa https://json.qualla.me/api/basic_json/unflatten/
+    basic_json unflatten() const
+    {
+        return json_pointer::unflatten(*this);
+    }
+
+    /// @}
+
+    //////////////////////////
+    // JSON Patch functions //
+    //////////////////////////
+
+    /// @name JSON Patch functions
+    /// @{
+
+    /// @brief applies a JSON patch in-place without copying the object
+    /// @sa https://json.qualla.me/api/basic_json/patch/
+    void patch_inplace(const basic_json& json_patch)
+    {
+        basic_json& result = *this;
+        // the valid JSON Patch operations
+        enum class patch_operations {add, remove, replace, move, copy, test, invalid};
+
+        const auto get_op = [](const std::string & op)
+        {
+            if (op == "add")
+            {
+                return patch_operations::add;
+            }
+            if (op == "remove")
+            {
+                return patch_operations::remove;
+            }
+            if (op == "replace")
+            {
+                return patch_operations::replace;
+            }
+            if (op == "move")
+            {
+                return patch_operations::move;
+            }
+            if (op == "copy")
+            {
+                return patch_operations::copy;
+            }
+            if (op == "test")
+            {
+                return patch_operations::test;
+            }
+
+            return patch_operations::invalid;
+        };
+
+        // wrapper for "add" operation; add value at ptr
+        const auto operation_add = [&result](json_pointer & ptr, basic_json val)
+        {
+            // adding to the root of the target document means replacing it
+            if (ptr.empty())
+            {
+                result = val;
+                return;
+            }
+
+            // make sure the top element of the pointer exists
+            json_pointer top_pointer = ptr.top();
+            if (top_pointer != ptr)
+            {
+                result.at(top_pointer);
+            }
+
+            // get reference to parent of JSON pointer ptr
+            const auto last_path = ptr.back();
+            ptr.pop_back();
+            // parent must exist when performing patch add per RFC6902 specs
+            basic_json& parent = result.at(ptr);
+
+            switch (parent.m_type)
+            {
+                case value_t::null:
+                case value_t::object:
+                {
+                    // use operator[] to add value
+                    parent[last_path] = val;
+                    break;
+                }
+
+                case value_t::array:
+                {
+                    if (last_path == "-")
+                    {
+                        // special case: append to back
+                        parent.push_back(val);
+                    }
+                    else
+                    {
+                        const auto idx = json_pointer::template array_index<basic_json_t>(last_path);
+                        if (JSON_HEDLEY_UNLIKELY(idx > parent.size()))
+                        {
+                            // avoid undefined behavior
+                            JSON_THROW(out_of_range::create(401, detail::concat("array index ", std::to_string(idx), " is out of range"), &parent));
+                        }
+
+                        // default case: insert add offset
+                        parent.insert(parent.begin() + static_cast<difference_type>(idx), val);
+                    }
+                    break;
+                }
+
+                // if there exists a parent it cannot be primitive
+                case value_t::string: // LCOV_EXCL_LINE
+                case value_t::boolean: // LCOV_EXCL_LINE
+                case value_t::number_integer: // LCOV_EXCL_LINE
+                case value_t::number_unsigned: // LCOV_EXCL_LINE
+                case value_t::number_float: // LCOV_EXCL_LINE
+                case value_t::binary: // LCOV_EXCL_LINE
+                case value_t::discarded: // LCOV_EXCL_LINE
+                default:            // LCOV_EXCL_LINE
+                    JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
+            }
+        };
+
+        // wrapper for "remove" operation; remove value at ptr
+        const auto operation_remove = [this, &result](json_pointer & ptr)
+        {
+            // get reference to parent of JSON pointer ptr
+            const auto last_path = ptr.back();
+            ptr.pop_back();
+            basic_json& parent = result.at(ptr);
+
+            // remove child
+            if (parent.is_object())
+            {
+                // perform range check
+                auto it = parent.find(last_path);
+                if (JSON_HEDLEY_LIKELY(it != parent.end()))
+                {
+                    parent.erase(it);
+                }
+                else
+                {
+                    JSON_THROW(out_of_range::create(403, detail::concat("key '", last_path, "' not found"), this));
+                }
+            }
+            else if (parent.is_array())
+            {
+                // note erase performs range check
+                parent.erase(json_pointer::template array_index<basic_json_t>(last_path));
+            }
+        };
+
+        // type check: top level value must be an array
+        if (JSON_HEDLEY_UNLIKELY(!json_patch.is_array()))
+        {
+            JSON_THROW(parse_error::create(104, 0, "JSON patch must be an array of objects", &json_patch));
+        }
+
+        // iterate and apply the operations
+        for (const auto& val : json_patch)
+        {
+            // wrapper to get a value for an operation
+            const auto get_value = [&val](const std::string & op,
+                                          const std::string & member,
+                                          bool string_type) -> basic_json &
+            {
+                // find value
+                auto it = val.m_value.object->find(member);
+
+                // context-sensitive error message
+                const auto error_msg = (op == "op") ? "operation" : detail::concat("operation '", op, '\'');
+
+                // check if desired value is present
+                if (JSON_HEDLEY_UNLIKELY(it == val.m_value.object->end()))
+                {
+                    // NOLINTNEXTLINE(performance-inefficient-string-concatenation)
+                    JSON_THROW(parse_error::create(105, 0, detail::concat(error_msg, " must have member '", member, "'"), &val));
+                }
+
+                // check if result is of type string
+                if (JSON_HEDLEY_UNLIKELY(string_type && !it->second.is_string()))
+                {
+                    // NOLINTNEXTLINE(performance-inefficient-string-concatenation)
+                    JSON_THROW(parse_error::create(105, 0, detail::concat(error_msg, " must have string member '", member, "'"), &val));
+                }
+
+                // no error: return value
+                return it->second;
+            };
+
+            // type check: every element of the array must be an object
+            if (JSON_HEDLEY_UNLIKELY(!val.is_object()))
+            {
+                JSON_THROW(parse_error::create(104, 0, "JSON patch must be an array of objects", &val));
+            }
+
+            // collect mandatory members
+            const auto op = get_value("op", "op", true).template get<std::string>();
+            const auto path = get_value(op, "path", true).template get<std::string>();
+            json_pointer ptr(path);
+
+            switch (get_op(op))
+            {
+                case patch_operations::add:
+                {
+                    operation_add(ptr, get_value("add", "value", false));
+                    break;
+                }
+
+                case patch_operations::remove:
+                {
+                    operation_remove(ptr);
+                    break;
+                }
+
+                case patch_operations::replace:
+                {
+                    // the "path" location must exist - use at()
+                    result.at(ptr) = get_value("replace", "value", false);
+                    break;
+                }
+
+                case patch_operations::move:
+                {
+                    const auto from_path = get_value("move", "from", true).template get<std::string>();
+                    json_pointer from_ptr(from_path);
+
+                    // the "from" location must exist - use at()
+                    basic_json v = result.at(from_ptr);
+
+                    // The move operation is functionally identical to a
+                    // "remove" operation on the "from" location, followed
+                    // immediately by an "add" operation at the target
+                    // location with the value that was just removed.
+                    operation_remove(from_ptr);
+                    operation_add(ptr, v);
+                    break;
+                }
+
+                case patch_operations::copy:
+                {
+                    const auto from_path = get_value("copy", "from", true).template get<std::string>();
+                    const json_pointer from_ptr(from_path);
+
+                    // the "from" location must exist - use at()
+                    basic_json v = result.at(from_ptr);
+
+                    // The copy is functionally identical to an "add"
+                    // operation at the target location using the value
+                    // specified in the "from" member.
+                    operation_add(ptr, v);
+                    break;
+                }
+
+                case patch_operations::test:
+                {
+                    bool success = false;
+                    JSON_TRY
+                    {
+                        // check if "value" matches the one at "path"
+                        // the "path" location must exist - use at()
+                        success = (result.at(ptr) == get_value("test", "value", false));
+                    }
+                    JSON_INTERNAL_CATCH (out_of_range&)
+                    {
+                        // ignore out of range errors: success remains false
+                    }
+
+                    // throw an exception if test fails
+                    if (JSON_HEDLEY_UNLIKELY(!success))
+                    {
+                        JSON_THROW(other_error::create(501, detail::concat("unsuccessful: ", val.dump()), &val));
+                    }
+
+                    break;
+                }
+
+                case patch_operations::invalid:
+                default:
+                {
+                    // op must be "add", "remove", "replace", "move", "copy", or
+                    // "test"
+                    JSON_THROW(parse_error::create(105, 0, detail::concat("operation value '", op, "' is invalid"), &val));
+                }
+            }
+        }
+    }
+
+    /// @brief applies a JSON patch to a copy of the current object
+    /// @sa https://json.qualla.me/api/basic_json/patch/
+    basic_json patch(const basic_json& json_patch) const
+    {
+        basic_json result = *this;
+        result.patch_inplace(json_patch);
+        return result;
+    }
+
+    /// @brief creates a diff as a JSON patch
+    /// @sa https://json.qualla.me/api/basic_json/diff/
+    JSON_HEDLEY_WARN_UNUSED_RESULT
+    static basic_json diff(const basic_json& source, const basic_json& target,
+                           const std::string& path = "")
+    {
+        // the patch
+        basic_json result(value_t::array);
+
+        // if the values are the same, return empty patch
+        if (source == target)
+        {
+            return result;
+        }
+
+        if (source.type() != target.type())
+        {
+            // different types: replace value
+            result.push_back(
+            {
+                {"op", "replace"}, {"path", path}, {"value", target}
+            });
+            return result;
+        }
+
+        switch (source.type())
+        {
+            case value_t::array:
+            {
+                // first pass: traverse common elements
+                std::size_t i = 0;
+                while (i < source.size() && i < target.size())
+                {
+                    // recursive call to compare array values at index i
+                    auto temp_diff = diff(source[i], target[i], detail::concat(path, '/', std::to_string(i)));
+                    result.insert(result.end(), temp_diff.begin(), temp_diff.end());
+                    ++i;
+                }
+
+                // We now reached the end of at least one array
+                // in a second pass, traverse the remaining elements
+
+                // remove my remaining elements
+                const auto end_index = static_cast<difference_type>(result.size());
+                while (i < source.size())
+                {
+                    // add operations in reverse order to avoid invalid
+                    // indices
+                    result.insert(result.begin() + end_index, object(
+                    {
+                        {"op", "remove"},
+                        {"path", detail::concat(path, '/', std::to_string(i))}
+                    }));
+                    ++i;
+                }
+
+                // add other remaining elements
+                while (i < target.size())
+                {
+                    result.push_back(
+                    {
+                        {"op", "add"},
+                        {"path", detail::concat(path, "/-")},
+                        {"value", target[i]}
+                    });
+                    ++i;
+                }
+
+                break;
+            }
+
+            case value_t::object:
+            {
+                // first pass: traverse this object's elements
+                for (auto it = source.cbegin(); it != source.cend(); ++it)
+                {
+                    // escape the key name to be used in a JSON patch
+                    const auto path_key = detail::concat(path, '/', detail::escape(it.key()));
+
+                    if (target.find(it.key()) != target.end())
+                    {
+                        // recursive call to compare object values at key it
+                        auto temp_diff = diff(it.value(), target[it.key()], path_key);
+                        result.insert(result.end(), temp_diff.begin(), temp_diff.end());
+                    }
+                    else
+                    {
+                        // found a key that is not in o -> remove it
+                        result.push_back(object(
+                        {
+                            {"op", "remove"}, {"path", path_key}
+                        }));
+                    }
+                }
+
+                // second pass: traverse other object's elements
+                for (auto it = target.cbegin(); it != target.cend(); ++it)
+                {
+                    if (source.find(it.key()) == source.end())
+                    {
+                        // found a key that is not in this -> add it
+                        const auto path_key = detail::concat(path, '/', detail::escape(it.key()));
+                        result.push_back(
+                        {
+                            {"op", "add"}, {"path", path_key},
+                            {"value", it.value()}
+                        });
+                    }
+                }
+
+                break;
+            }
+
+            case value_t::null:
+            case value_t::string:
+            case value_t::boolean:
+            case value_t::number_integer:
+            case value_t::number_unsigned:
+            case value_t::number_float:
+            case value_t::binary:
+            case value_t::discarded:
+            default:
+            {
+                // both primitive type: replace value
+                result.push_back(
+                {
+                    {"op", "replace"}, {"path", path}, {"value", target}
+                });
+                break;
+            }
+        }
+
+        return result;
+    }
+    /// @}
+
+    ////////////////////////////////
+    // JSON Merge Patch functions //
+    ////////////////////////////////
+
+    /// @name JSON Merge Patch functions
+    /// @{
+
+    /// @brief applies a JSON Merge Patch
+    /// @sa https://json.qualla.me/api/basic_json/merge_patch/
+    void merge_patch(const basic_json& apply_patch)
+    {
+        if (apply_patch.is_object())
+        {
+            if (!is_object())
+            {
+                *this = object();
+            }
+            for (auto it = apply_patch.begin(); it != apply_patch.end(); ++it)
+            {
+                if (it.value().is_null())
+                {
+                    erase(it.key());
+                }
+                else
+                {
+                    operator[](it.key()).merge_patch(it.value());
+                }
+            }
+        }
+        else
+        {
+            *this = apply_patch;
+        }
+    }
+
+    /// @}
+};
+
+/// @brief user-defined to_string function for JSON values
+/// @sa https://json.qualla.me/api/basic_json/to_string/
+NLOHMANN_BASIC_JSON_TPL_DECLARATION
+std::string to_string(const NLOHMANN_BASIC_JSON_TPL& j)
+{
+    return j.dump();
+}
+
+inline namespace literals
+{
+inline namespace json_literals
+{
+
+/// @brief user-defined string literal for JSON values
+/// @sa https://json.qualla.me/api/basic_json/operator_literal_json/
+JSON_HEDLEY_NON_NULL(1)
+inline qualla::json operator "" _json(const char* s, std::size_t n)
+{
+    return qualla::json::parse(s, s + n);
+}
+
+/// @brief user-defined string literal for JSON pointer
+/// @sa https://json.qualla.me/api/basic_json/operator_literal_json_pointer/
+JSON_HEDLEY_NON_NULL(1)
+inline qualla::json::json_pointer operator "" _json_pointer(const char* s, std::size_t n)
+{
+    return qualla::json::json_pointer(std::string(s, n));
+}
+
+}  // namespace json_literals
+}  // namespace literals
+NLOHMANN_JSON_NAMESPACE_END
+
+///////////////////////
+// nonmember support //
+///////////////////////
+
+namespace std // NOLINT(cert-dcl58-cpp)
+{
+
+/// @brief hash value for JSON objects
+/// @sa https://json.qualla.me/api/basic_json/std_hash/
+NLOHMANN_BASIC_JSON_TPL_DECLARATION
+struct hash<qualla::NLOHMANN_BASIC_JSON_TPL>
+{
+    std::size_t operator()(const qualla::NLOHMANN_BASIC_JSON_TPL& j) const
+    {
+        return qualla::detail::hash(j);
+    }
+};
+
+// specialization for std::less<value_t>
+template<>
+struct less< ::qualla::detail::value_t> // do not remove the space after '<', see https://github.com/nlohmann/json/pull/679
+{
+    /*!
+    @brief compare two value_t enum values
+    @since version 3.0.0
+    */
+    bool operator()(::qualla::detail::value_t lhs,
+                    ::qualla::detail::value_t rhs) const noexcept
+    {
+#if JSON_HAS_THREE_WAY_COMPARISON
+        return std::is_lt(lhs <=> rhs); // *NOPAD*
+#else
+        return ::qualla::detail::operator<(lhs, rhs);
+#endif
+    }
+};
+
+// C++20 prohibit function specialization in the std namespace.
+#ifndef JSON_HAS_CPP_20
+
+/// @brief exchanges the values of two JSON objects
+/// @sa https://json.qualla.me/api/basic_json/std_swap/
+NLOHMANN_BASIC_JSON_TPL_DECLARATION
+inline void swap(qualla::NLOHMANN_BASIC_JSON_TPL& j1, qualla::NLOHMANN_BASIC_JSON_TPL& j2) noexcept(  // NOLINT(readability-inconsistent-declaration-parameter-name)
+    is_nothrow_move_constructible<qualla::NLOHMANN_BASIC_JSON_TPL>::value&&                          // NOLINT(misc-redundant-expression)
+    is_nothrow_move_assignable<qualla::NLOHMANN_BASIC_JSON_TPL>::value)
+{
+    j1.swap(j2);
+}
+
+#endif
+
+}  // namespace std
+
+#if JSON_USE_GLOBAL_UDLS
+    using qualla::literals::json_literals::operator "" _json; // NOLINT(misc-unused-using-decls,google-global-names-in-headers)
+    using qualla::literals::json_literals::operator "" _json_pointer; //NOLINT(misc-unused-using-decls,google-global-names-in-headers)
+#endif
+
+// #include <qualla/detail/macro_unscope.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+// restore clang diagnostic settings
+#if defined(__clang__)
+    #pragma clang diagnostic pop
+#endif
+
+// clean up
+#undef JSON_ASSERT
+#undef JSON_INTERNAL_CATCH
+#undef JSON_THROW
+#undef JSON_PRIVATE_UNLESS_TESTED
+#undef NLOHMANN_BASIC_JSON_TPL_DECLARATION
+#undef NLOHMANN_BASIC_JSON_TPL
+#undef JSON_EXPLICIT
+#undef NLOHMANN_CAN_CALL_STD_FUNC_IMPL
+#undef JSON_INLINE_VARIABLE
+#undef JSON_NO_UNIQUE_ADDRESS
+#undef JSON_DISABLE_ENUM_SERIALIZATION
+#undef JSON_USE_GLOBAL_UDLS
+
+#ifndef JSON_TEST_KEEP_MACROS
+    #undef JSON_CATCH
+    #undef JSON_TRY
+    #undef JSON_HAS_CPP_11
+    #undef JSON_HAS_CPP_14
+    #undef JSON_HAS_CPP_17
+    #undef JSON_HAS_CPP_20
+    #undef JSON_HAS_FILESYSTEM
+    #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
+    #undef JSON_HAS_THREE_WAY_COMPARISON
+    #undef JSON_HAS_RANGES
+    #undef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
+#endif
+
+// #include <qualla/thirdparty/hedley/hedley_undef.hpp>
+//     __ _____ _____ _____
+//  __|  |   __|     |   | |  JSON for Modern C++
+// |  |  |__   |  |  | | | |  version 3.11.2
+// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
+// SPDX-License-Identifier: MIT
+
+
+
+#undef JSON_HEDLEY_ALWAYS_INLINE
+#undef JSON_HEDLEY_ARM_VERSION
+#undef JSON_HEDLEY_ARM_VERSION_CHECK
+#undef JSON_HEDLEY_ARRAY_PARAM
+#undef JSON_HEDLEY_ASSUME
+#undef JSON_HEDLEY_BEGIN_C_DECLS
+#undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE
+#undef JSON_HEDLEY_CLANG_HAS_BUILTIN
+#undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE
+#undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE
+#undef JSON_HEDLEY_CLANG_HAS_EXTENSION
+#undef JSON_HEDLEY_CLANG_HAS_FEATURE
+#undef JSON_HEDLEY_CLANG_HAS_WARNING
+#undef JSON_HEDLEY_COMPCERT_VERSION
+#undef JSON_HEDLEY_COMPCERT_VERSION_CHECK
+#undef JSON_HEDLEY_CONCAT
+#undef JSON_HEDLEY_CONCAT3
+#undef JSON_HEDLEY_CONCAT3_EX
+#undef JSON_HEDLEY_CONCAT_EX
+#undef JSON_HEDLEY_CONST
+#undef JSON_HEDLEY_CONSTEXPR
+#undef JSON_HEDLEY_CONST_CAST
+#undef JSON_HEDLEY_CPP_CAST
+#undef JSON_HEDLEY_CRAY_VERSION
+#undef JSON_HEDLEY_CRAY_VERSION_CHECK
+#undef JSON_HEDLEY_C_DECL
+#undef JSON_HEDLEY_DEPRECATED
+#undef JSON_HEDLEY_DEPRECATED_FOR
+#undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
+#undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_
+#undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED
+#undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES
+#undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS
+#undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION
+#undef JSON_HEDLEY_DIAGNOSTIC_POP
+#undef JSON_HEDLEY_DIAGNOSTIC_PUSH
+#undef JSON_HEDLEY_DMC_VERSION
+#undef JSON_HEDLEY_DMC_VERSION_CHECK
+#undef JSON_HEDLEY_EMPTY_BASES
+#undef JSON_HEDLEY_EMSCRIPTEN_VERSION
+#undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK
+#undef JSON_HEDLEY_END_C_DECLS
+#undef JSON_HEDLEY_FLAGS
+#undef JSON_HEDLEY_FLAGS_CAST
+#undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE
+#undef JSON_HEDLEY_GCC_HAS_BUILTIN
+#undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE
+#undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE
+#undef JSON_HEDLEY_GCC_HAS_EXTENSION
+#undef JSON_HEDLEY_GCC_HAS_FEATURE
+#undef JSON_HEDLEY_GCC_HAS_WARNING
+#undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK
+#undef JSON_HEDLEY_GCC_VERSION
+#undef JSON_HEDLEY_GCC_VERSION_CHECK
+#undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE
+#undef JSON_HEDLEY_GNUC_HAS_BUILTIN
+#undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE
+#undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE
+#undef JSON_HEDLEY_GNUC_HAS_EXTENSION
+#undef JSON_HEDLEY_GNUC_HAS_FEATURE
+#undef JSON_HEDLEY_GNUC_HAS_WARNING
+#undef JSON_HEDLEY_GNUC_VERSION
+#undef JSON_HEDLEY_GNUC_VERSION_CHECK
+#undef JSON_HEDLEY_HAS_ATTRIBUTE
+#undef JSON_HEDLEY_HAS_BUILTIN
+#undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE
+#undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS
+#undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE
+#undef JSON_HEDLEY_HAS_EXTENSION
+#undef JSON_HEDLEY_HAS_FEATURE
+#undef JSON_HEDLEY_HAS_WARNING
+#undef JSON_HEDLEY_IAR_VERSION
+#undef JSON_HEDLEY_IAR_VERSION_CHECK
+#undef JSON_HEDLEY_IBM_VERSION
+#undef JSON_HEDLEY_IBM_VERSION_CHECK
+#undef JSON_HEDLEY_IMPORT
+#undef JSON_HEDLEY_INLINE
+#undef JSON_HEDLEY_INTEL_CL_VERSION
+#undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK
+#undef JSON_HEDLEY_INTEL_VERSION
+#undef JSON_HEDLEY_INTEL_VERSION_CHECK
+#undef JSON_HEDLEY_IS_CONSTANT
+#undef JSON_HEDLEY_IS_CONSTEXPR_
+#undef JSON_HEDLEY_LIKELY
+#undef JSON_HEDLEY_MALLOC
+#undef JSON_HEDLEY_MCST_LCC_VERSION
+#undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK
+#undef JSON_HEDLEY_MESSAGE
+#undef JSON_HEDLEY_MSVC_VERSION
+#undef JSON_HEDLEY_MSVC_VERSION_CHECK
+#undef JSON_HEDLEY_NEVER_INLINE
+#undef JSON_HEDLEY_NON_NULL
+#undef JSON_HEDLEY_NO_ESCAPE
+#undef JSON_HEDLEY_NO_RETURN
+#undef JSON_HEDLEY_NO_THROW
+#undef JSON_HEDLEY_NULL
+#undef JSON_HEDLEY_PELLES_VERSION
+#undef JSON_HEDLEY_PELLES_VERSION_CHECK
+#undef JSON_HEDLEY_PGI_VERSION
+#undef JSON_HEDLEY_PGI_VERSION_CHECK
+#undef JSON_HEDLEY_PREDICT
+#undef JSON_HEDLEY_PRINTF_FORMAT
+#undef JSON_HEDLEY_PRIVATE
+#undef JSON_HEDLEY_PUBLIC
+#undef JSON_HEDLEY_PURE
+#undef JSON_HEDLEY_REINTERPRET_CAST
+#undef JSON_HEDLEY_REQUIRE
+#undef JSON_HEDLEY_REQUIRE_CONSTEXPR
+#undef JSON_HEDLEY_REQUIRE_MSG
+#undef JSON_HEDLEY_RESTRICT
+#undef JSON_HEDLEY_RETURNS_NON_NULL
+#undef JSON_HEDLEY_SENTINEL
+#undef JSON_HEDLEY_STATIC_ASSERT
+#undef JSON_HEDLEY_STATIC_CAST
+#undef JSON_HEDLEY_STRINGIFY
+#undef JSON_HEDLEY_STRINGIFY_EX
+#undef JSON_HEDLEY_SUNPRO_VERSION
+#undef JSON_HEDLEY_SUNPRO_VERSION_CHECK
+#undef JSON_HEDLEY_TINYC_VERSION
+#undef JSON_HEDLEY_TINYC_VERSION_CHECK
+#undef JSON_HEDLEY_TI_ARMCL_VERSION
+#undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK
+#undef JSON_HEDLEY_TI_CL2000_VERSION
+#undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK
+#undef JSON_HEDLEY_TI_CL430_VERSION
+#undef JSON_HEDLEY_TI_CL430_VERSION_CHECK
+#undef JSON_HEDLEY_TI_CL6X_VERSION
+#undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK
+#undef JSON_HEDLEY_TI_CL7X_VERSION
+#undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK
+#undef JSON_HEDLEY_TI_CLPRU_VERSION
+#undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK
+#undef JSON_HEDLEY_TI_VERSION
+#undef JSON_HEDLEY_TI_VERSION_CHECK
+#undef JSON_HEDLEY_UNAVAILABLE
+#undef JSON_HEDLEY_UNLIKELY
+#undef JSON_HEDLEY_UNPREDICTABLE
+#undef JSON_HEDLEY_UNREACHABLE
+#undef JSON_HEDLEY_UNREACHABLE_RETURN
+#undef JSON_HEDLEY_VERSION
+#undef JSON_HEDLEY_VERSION_DECODE_MAJOR
+#undef JSON_HEDLEY_VERSION_DECODE_MINOR
+#undef JSON_HEDLEY_VERSION_DECODE_REVISION
+#undef JSON_HEDLEY_VERSION_ENCODE
+#undef JSON_HEDLEY_WARNING
+#undef JSON_HEDLEY_WARN_UNUSED_RESULT
+#undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG
+#undef JSON_HEDLEY_FALL_THROUGH
+
+
+
+#endif  // INCLUDE_NLOHMANN_JSON_HPP_
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/kpi.hpp b/Genie/Genie/src/qualla/include/qualla/detail/kpi.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c6c2c06d7e2d363a53d46de3ece2c74cb9f29b4e
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/kpi.hpp
@@ -0,0 +1,47 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_KPI_HPP
+#define QUALLA_DETAIL_KPI_HPP
+
+#include <chrono>
+#include <string>
+#include <string_view>
+#include <stdint.h>
+
+namespace qualla {
+
+struct Kpi {
+    uint64_t count;      // number of events
+    uint64_t last_usec;  // usec spent on the last event
+    uint64_t total_usec; // total usec spent on this event
+    uint64_t min_usec;   // min usec spent on any event
+    uint64_t max_usec;   // max usec spend on any event
+
+    std::string dump(std::string_view sep = " ") const;
+
+    void reset() {
+        count      = 0;
+        total_usec = 0;
+        last_usec  = 0;
+        min_usec   = ~0UL;
+        max_usec   = 0;
+    }
+
+    void update(uint64_t usec) {
+        ++count;
+        last_usec = usec;
+        total_usec += usec;
+        if (usec > max_usec) max_usec = usec;
+        if (usec < min_usec) min_usec = usec;
+    }
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_KPI_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/lhd-dialog.hpp b/Genie/Genie/src/qualla/include/qualla/detail/lhd-dialog.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..fd0adfbce3730957b7373a6abfc4d547dfa07186
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/lhd-dialog.hpp
@@ -0,0 +1,85 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All rights reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_LOOKAHEAD_DIALOG_HPP
+#define QUALLA_DETAIL_LOOKAHEAD_DIALOG_HPP
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <span>
+#include <stdint.h>
+
+#include <qualla/detail/json.hpp>
+#include <qualla/env.hpp>
+#include <qualla/dialog.hpp>
+
+namespace qualla {
+
+class LhdDecDialog : public Dialog {
+  public:
+    LhdDecDialog(std::shared_ptr<Env> env, const std::string& name, const json& conf);
+
+    virtual bool process(std::vector<int32_t>& tokens, Dialog::Callback callback) override;
+
+    virtual bool process(std::vector<int32_t>& tokens, DialogCallback callback) override {
+        return false;
+    }
+
+  protected:
+    enum LHFwdMode { ALWAYS_FWD_ONE = 0x0, FWD_MAX_HIT = 0x1, FWD_LEVEL = 0x2 };
+    struct ngram_data {
+        bool    active = false;
+        int32_t seq_id = -1;
+
+        // match pos
+        std::vector<int>     i_batch;
+        std::vector<int32_t> tokens;
+    };
+
+    // n-gram pool
+    struct ngram_container {
+        ngram_container(int n_vocab, int n, int g) {
+            cnt.resize(n_vocab);
+            head.resize(n_vocab);
+            tokens.resize(n_vocab * g * (n - 1));
+        }
+
+        int n_total = 0;
+
+        std::vector<size_t> cnt;
+        std::vector<int>    head;
+
+        // [n_vocab][G][N - 1]
+        std::vector<int32_t> tokens;
+    };
+
+    // W/N/G
+    size_t _window;
+    size_t _ngram;
+    size_t _gcap;
+
+    size_t _n_accept{0};  // number of match tokens
+    size_t _level_idx{1}; // lookahead branch level
+
+    // lookahead branch update mode
+    std::string _lhd_mode_str;
+    LHFwdMode   _lhd_update_mode{ALWAYS_FWD_ONE};
+
+    // verification branch
+    std::vector<ngram_data>           v_branch;
+    std::vector<std::vector<int32_t>> lhd_branch;
+    std::vector<int32_t>              lhd_branch_prev;
+
+    std::vector<int32_t> batch;
+    std::vector<int32_t> attention_map;
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_LOOKAHEAD_DIALOG_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/multistream-dialog.hpp b/Genie/Genie/src/qualla/include/qualla/detail/multistream-dialog.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6be647bab5a41bcfe96b206843979ee9a2ba3e6a
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/multistream-dialog.hpp
@@ -0,0 +1,58 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All rights reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_BASIC_DIALOG_HPP
+#define QUALLA_DETAIL_BASIC_DIALOG_HPP
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <span>
+#include <stdint.h>
+
+#include <qualla/detail/json.hpp>
+#include <qualla/env.hpp>
+#include <qualla/dialog.hpp>
+
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/json.hpp>
+
+namespace qualla {
+
+using qc = qualla::Config;
+
+class MultiStreamDialog : public Dialog {
+  public:
+    MultiStreamDialog(std::shared_ptr<Env> env, const std::string& name, const json& conf)
+        : Dialog(env, name, conf) {
+        _vocab       = _ctx->n_vocab();
+        _n_streams   = qc::optional<int32_t>(conf, "n-streams", 1);
+        _p_threshold = qc::optional<float>(conf, "p-threshold", 0.0);
+    }
+
+    virtual bool process(std::vector<int32_t>& tokens, Dialog::Callback callback) override;
+
+    virtual bool process(std::vector<uint8_t>& embedding_vectors, Dialog::T2ECallback t2eCallback, Dialog::Callback callback) override;
+
+    virtual bool process(std::vector<int32_t>& tokens, DialogCallback callback) override {
+        return false;
+    }
+
+  protected:
+    int32_t _vocab;
+    int32_t _n_streams;
+    int32_t _prompt_len{-1};
+    float   _p_threshold;
+
+  private:
+    bool processFollowOnGeneration(std::vector<std::vector<int32_t>>& streams, std::vector<float>& logits, Dialog::Callback callback);
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_BASIC_DIALOG_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/onload.hpp b/Genie/Genie/src/qualla/include/qualla/detail/onload.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..31c62d2e1e6ae7b75d3bf107cab542893f5b598c
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/onload.hpp
@@ -0,0 +1,23 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_ONLOAD_HPP
+#define QUALLA_DETAIL_ONLOAD_HPP
+
+#include <functional>
+
+namespace qualla {
+
+class OnLoad {
+  public:
+    OnLoad(std::function<void()> func) { func(); }
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_ONLOAD_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/preproc.hpp b/Genie/Genie/src/qualla/include/qualla/detail/preproc.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..dbee9bb729f2ea1a3109a6d98d2eb5b6a7ff497e
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/preproc.hpp
@@ -0,0 +1,21 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_PREPROC_HPP
+#define QUALLA_DETAIL_PREPROC_HPP
+
+#include <stdio.h>
+#define QUALLA_ASSERT(x)                                                                           \
+    do {                                                                                           \
+        if (!(x)) {                                                                                \
+            fprintf(stderr, "QUALLA_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x);                 \
+            abort();                                                                               \
+        }                                                                                          \
+    } while (0)
+
+#endif // QUALLA_DETAIL_PREPROC_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/sampler-utils.hpp b/Genie/Genie/src/qualla/include/qualla/detail/sampler-utils.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..563765dd8e136233d5e740e08b5b0454f7faf96d
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/sampler-utils.hpp
@@ -0,0 +1,288 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_SAMPLER_UTILS_HPP
+#define QUALLA_DETAIL_SAMPLER_UTILS_HPP
+
+#ifdef _MSC_VER
+    #pragma warning(disable : 4068)
+#endif
+
+#include <qualla/detail/preproc.hpp>
+
+#include <functional>
+#include <string>
+#include <random>
+#include <span>
+
+namespace qualla {
+
+typedef std::mt19937 rng_t;
+
+// Various sampling utilities.
+
+static double sampleFromUniform(rng_t& rng) {
+    int    a      = rng() >> 5;
+    int    b      = rng() >> 6;
+    double sample = (a * 67108864.0 + b) / 9007199254740992.0;
+    return sample;
+}
+
+static double sampleFromGumbel(rng_t& rng) {
+    double tiny    = 1.1754943508222875e-38;
+    double eps     = 1.1920928955078125e-07;
+    double uniform = sampleFromUniform(rng);
+    double gumbel  = -std::log(-std::log(tiny + uniform * (1. - eps - tiny)));
+    return gumbel;
+}
+
+// Returns the index of an element chosen by applying the given probability distribution.
+template <typename T>
+static int32_t sampleFromProbs(const std::span<T> probs, rng_t& rng) {
+    static_assert(std::is_floating_point<T>::value);
+    std::discrete_distribution<> dist(probs.begin(), probs.end());
+    return dist(rng);
+}
+
+// Returns the index of the element chosen by the Gumbel max algorithm
+template <typename T>
+static int32_t sampleUsingGumbelMax(const std::span<T> log_probs, rng_t& rng) {
+    static_assert(std::is_floating_point<T>::value);
+    int32_t max_purturbed_logit = std::numeric_limits<int32_t>::min();
+    int32_t max_idx             = 0;
+
+    for (int32_t i = 0; i < log_probs.size(); i++) {
+        float purturbed_logit = log_probs[i] + sampleFromGumbel(rng);
+        if (purturbed_logit > max_purturbed_logit) {
+            max_purturbed_logit = purturbed_logit;
+            max_idx             = i;
+        }
+    }
+    return max_idx;
+}
+
+// Add gumbel noise to a set of logits
+template <typename T>
+void addGumbelNoise(std::vector<T>& log_probs, rng_t& rng) {
+    static_assert(std::is_floating_point<T>::value);
+    for (int32_t i = 0; i < log_probs.size(); i++) {
+        log_probs[i] = log_probs[i] + sampleFromGumbel(rng);
+    }
+}
+
+// Returns the index of the top token.
+template <typename T>
+static int32_t argmax(const std::span<T> probs) {
+    static_assert(std::is_floating_point<T>::value);
+    auto   result = std::max_element(probs.begin(), probs.end());
+    size_t id     = std::distance(probs.begin(), result);
+
+    return int32_t(id);
+}
+
+// A wrapper around a vector of logits that also keeps track of indices
+struct IndexedLogits {
+    std::mt19937&          rng;
+    std::span<const float> logits;
+    std::vector<float>     probs;
+    std::vector<int32_t>   indices;
+    bool                   probs_valid;
+    bool                   sorted;
+
+    IndexedLogits(std::span<const float> logits, std::mt19937& r)
+        : rng(r), logits(logits), probs(logits.size(), 0.f), indices(logits.size()),
+          probs_valid(false), sorted(false) {
+        std::iota(indices.begin(), indices.end(), 0);
+    }
+
+    size_t size(void) const { return logits.size(); }
+
+    // Performs a partial sort or a full sort depending on k.
+    size_t sort(size_t k = 0) {
+        size_t logits_size = logits.size();
+
+        k = k == 0 ? logits_size : k;
+        k = std::min(k, logits_size);
+
+        std::partial_sort(
+                indices.begin(),
+                indices.begin() + k,
+                indices.end(),
+                [this](int32_t a, int32_t b) { return logits[a] > logits[b]; }
+        );
+
+        // FIXME: avoid overwriting input logits
+
+        if (probs_valid) {
+            std::vector<float> tmp(k);
+            std::vector<float> tmpf(k);
+            for (int32_t i = 0; i < k; i++) {
+                tmp[i]  = logits[indices[i]];
+                tmpf[i] = probs[indices[i]];
+            }
+            memcpy(const_cast<float*>(logits.data()), tmp.data(), k * sizeof(float));
+            memcpy(probs.data(), tmpf.data(), k * sizeof(float));
+        } else {
+            std::vector<float> tmp(k);
+            for (int32_t i = 0; i < k; i++) {
+                tmp[i] = logits[indices[i]];
+            }
+            memcpy(const_cast<float*>(logits.data()), tmp.data(), k * sizeof(float));
+        }
+        sorted = true;
+        return k;
+    }
+
+    // Does softmax in-place given a set of logits and a scaling temperature.
+    void softmax(float temp = 1.f) {
+        QUALLA_ASSERT(temp > 0.f);
+
+        float max_logit;
+
+        if (sorted) {
+            max_logit = logits[0];
+        } else {
+            auto max_iter = std::max_element(logits.begin(), logits.end());
+            max_logit     = *max_iter;
+        }
+
+        float max_scaled = max_logit / temp;
+        float sum_exp    = 0.0f;
+
+#pragma clang loop vectorize(enable)
+        for (size_t i = 0; i < logits.size(); i++) {
+            float p  = std::exp((logits[i] / temp) - max_scaled);
+            probs[i] = p;
+            sum_exp += p;
+        }
+
+#pragma clang loop vectorize(enable)
+        for (size_t i = 0; i < logits.size(); i++) {
+            probs[i] /= sum_exp;
+        }
+
+        probs_valid = true;
+    }
+
+    void logSoftmax(float temp = 1.f) {
+        QUALLA_ASSERT(temp > 0.f);
+        float max_logit;
+
+        if (sorted) {
+            max_logit = logits[0];
+        } else {
+            auto max_iter = std::max_element(logits.begin(), logits.end());
+            max_logit     = *max_iter;
+        }
+
+        // log(e^x / sum(e^x)) -> log(e^x) - log(sum(e^x))
+        // We're still using the probs vector, despite the outputs technically
+        // being log probabilities.
+
+        float max_scaled = max_logit / temp;
+        float sum_exp    = 0.0f;
+
+#pragma clang loop vectorize(enable)
+        for (size_t i = 0; i < logits.size(); i++) {
+            float p  = (logits[i] / temp) - max_scaled;
+            probs[i] = p;
+            sum_exp += std::exp(p);
+        }
+
+        float log_sum_exp = std::log(sum_exp);
+#pragma clang loop vectorize(enable)
+        for (size_t i = 0; i < logits.size(); i++) {
+            probs[i] -= log_sum_exp;
+        }
+
+        probs_valid = true;
+    }
+
+    // Performs top-k
+    void topK(int32_t k) {
+        QUALLA_ASSERT(k > 0);
+        k = this->sort(k);
+
+        logits = logits.subspan(0, k);
+        probs.resize(k);
+        indices.resize(k);
+    }
+
+    // Performs top-p in-place.
+    // Sorts the logits/probabilities if not sorted already.
+    void topP(float p, int32_t min_keep = 1) {
+        if (p >= 1) return;
+
+        if (!sorted) this->sort();
+        if (!probs_valid) this->softmax();
+
+        // Compute the cumulative probabilities
+        float  cum_sum   = 0.0;
+        size_t last_idx  = logits.size() - 1;
+        size_t n_to_trim = 0;
+
+        for (size_t i = last_idx; i > 0; --i) {
+            cum_sum += probs[i];
+            if (cum_sum <= 1.0 - p) {
+                n_to_trim++;
+            } else {
+                break;
+            }
+        }
+
+        size_t n_remain = logits.size() - n_to_trim;
+        if (n_remain < min_keep) {
+            n_remain += min_keep - n_remain;
+        }
+
+        logits = logits.first(n_remain);
+        probs.resize(n_remain);
+        indices.resize(n_remain);
+
+        // The probabilities no longer add up to 1.
+        probs_valid = false;
+    }
+
+    // Greedy sampling
+    int32_t sampleGreedyUnsorted() {
+        auto   result = std::max_element(logits.begin(), logits.end());
+        size_t id     = std::distance(logits.begin(), result);
+
+        std::fill_n(probs.begin(), probs.size(), (float)0);
+        probs[id] = 1.0;
+
+        probs_valid = true;
+        return int32_t(id);
+    }
+
+    // Sampling from prob distribution
+    int32_t sampleFromProbs() {
+        QUALLA_ASSERT(probs_valid);
+        int32_t idx = qualla::sampleFromProbs<float>(std::span{probs.data(),probs.size()}, rng);
+        return int32_t(indices[idx]);
+    }
+
+    // Sampling with Gumbel Max
+    int32_t sampleUsingGumbelMax() {
+        QUALLA_ASSERT(probs_valid);
+        // probs here must be log-probabilities
+        int32_t idx = qualla::sampleUsingGumbelMax<float>(std::span{probs.data(),probs.size()}, rng);
+        return int32_t(indices[idx]);
+    }
+
+    // add gumbel noise to the logits
+    bool addGumbelNoise() {
+        // probs here must be log-probabilities
+        qualla::addGumbelNoise<float>(probs, rng);
+        return true;
+    }
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_SAMPLER_UTILS_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/sentence.hpp b/Genie/Genie/src/qualla/include/qualla/detail/sentence.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e1c5d59f9c587d5aa6e17469d12a34a614d5c3b2
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/sentence.hpp
@@ -0,0 +1,33 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_SENTENCE_HPP
+#define QUALLA_DETAIL_SENTENCE_HPP
+
+#include <string>
+
+namespace qualla {
+
+struct Sentence {
+    enum Code {
+        COMPLETE, // Complete sentence
+        BEGIN,    // First part of the sentence
+        CONTINUE, // Continuation of the sentense
+        END,      // Last part of the sentence
+        ABORT     // Sentence aborted
+    };
+
+    static inline std::string str(Code c) {
+        static const char* s[]{"COMPLETE", "BEGIN", "CONTINUE", "END", "ABORT"};
+        return std::string(s[c]);
+    }
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_SENTENCE_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/state.hpp b/Genie/Genie/src/qualla/include/qualla/detail/state.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2c1a95aced0d444ff11266bbaa3ea3aff41951a0
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/state.hpp
@@ -0,0 +1,58 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_STATE_HPP
+#define QUALLA_DETAIL_STATE_HPP
+
+#include <sstream>
+#include <string>
+#include <atomic>
+
+namespace qualla {
+
+class State {
+  public:
+    void clear() {
+        _busy     = false;
+        _canceled = false;
+        _failed   = false;
+        _error.clear();
+    }
+
+    void error(const std::string& e) {
+        _failed = true;
+        _error  = e;
+    }
+    void error(const std::stringstream& e) { error(e.str()); }
+
+    void fatal(const std::string& e) {
+        _fatal = true;
+        error(e);
+    }
+    void fatal(const std::stringstream& e) { fatal(e.str()); }
+
+    void busy(bool v = true) { _busy = v; }
+
+    void cancel() { _canceled = true; }
+
+    bool               busy() const { return _busy; }
+    bool               canceled() const { return _canceled; }
+    bool               failed() const { return _failed || _fatal; }
+    const std::string& error() const { return _error; }
+
+  private:
+    std::atomic<bool> _busy{false};     // procesing in progress
+    std::atomic<bool> _canceled{false}; // cancel current processing
+    std::atomic<bool> _failed{false};   // init or processing failed, can be cleared
+    std::atomic<bool> _fatal{false};    // init or processing failed, cannot be cleared
+    std::string       _error;           // failure reason
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_STATE_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/threadpool.hpp b/Genie/Genie/src/qualla/include/qualla/detail/threadpool.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..dc5340869ded8888ba4aa61e58d010323a2f949d
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/threadpool.hpp
@@ -0,0 +1,78 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_THREADPOOL_HPP
+#define QUALLA_DETAIL_THREADPOOL_HPP
+
+#include <atomic>
+#include <thread>
+#include <mutex>
+#include <vector>
+#include <condition_variable>
+#include <functional>
+#include <queue>
+
+namespace qualla {
+
+// Generic thread pool
+class ThreadPool {
+  public:
+    size_t size() const { return _n_threads; }
+
+    // Check for queued up jobs
+    bool busy() {
+        std::unique_lock<std::mutex> lock(_queue_mutex);
+        return !_jobs.empty();
+    }
+
+    // Enque single job
+    void enqueue(const std::function<void()>& job) {
+        _queue_mutex.lock();
+        _poll = _enable_polling;
+        _jobs.push(job);
+        _queue_mutex.unlock();
+        _mutex_condition.notify_one();
+    }
+
+    // Enque multiple jobs
+    // Avoids extra latency due the race to lock mutex between enque() and threadLoop()
+    void enqueue(const std::vector<std::function<void()>>& job_list) {
+        _queue_mutex.lock();
+        _poll = _enable_polling;
+        for (auto& j : job_list)
+            _jobs.push(j);
+        _queue_mutex.unlock();
+        _mutex_condition.notify_all();
+    }
+
+    // Start worker threads
+    void start(unsigned int n_threads, uint64_t cpumask = 0, bool polling = false);
+
+    // Stop worker threads
+    void stop();
+
+    // Susped worker threads (stop polling)
+    void suspend();
+
+  private:
+    size_t                  _n_threads{0};
+    volatile bool           _terminate{false};      // Tells threads to stop looking for jobs
+    volatile bool           _poll{false};           // Tells threads to poll or not
+    uint64_t                _cpumask{0};            // Bind worker threads to select cpus
+    bool                    _enable_polling{false}; // Use polling to wait for jobs
+    std::mutex              _queue_mutex{};         // Prevents data races to the job queue
+    std::condition_variable _mutex_condition{}; // Allows threads to wait on new jobs or termination
+    std::vector<std::thread>          _threads;
+    std::queue<std::function<void()>> _jobs;
+
+    void loop(uint32_t ti);
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_THREADPOOL_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/timer.hpp b/Genie/Genie/src/qualla/include/qualla/detail/timer.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..72b624368958cfe602263ba0db51b1aa913d3743
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/timer.hpp
@@ -0,0 +1,51 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_TIMER_HPP
+#define QUALLA_DETAIL_TIMER_HPP
+
+#include <chrono>
+#include <string>
+#include <atomic>
+
+namespace qualla {
+
+template <typename C = std::chrono::steady_clock, typename T = std::chrono::time_point<C>>
+class Timer {
+  public:
+    Timer() { reset(); }
+
+    void reset() { _t = C::now(); }
+
+    T get() const { return _t; }
+
+    uint64_t nsec() const {
+        return std::chrono::duration_cast<std::chrono::nanoseconds>(_t.time_since_epoch()).count();
+    }
+
+    uint64_t usec() const {
+        return std::chrono::duration_cast<std::chrono::microseconds>(_t.time_since_epoch()).count();
+    }
+
+    uint64_t elapsed_nsec() const {
+        return std::chrono::duration_cast<std::chrono::nanoseconds>(C::now() - _t).count();
+    }
+
+    uint64_t elapsed_usec() const {
+        return std::chrono::duration_cast<std::chrono::microseconds>(C::now() - _t).count();
+    }
+
+    float elapsed_msec() const { return elapsed_usec() / 1000.0; }
+
+  private:
+    T _t;
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_TIMER_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/detail/trie.hpp b/Genie/Genie/src/qualla/include/qualla/detail/trie.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d6d3bb7b964537f5cea2b184669dee586987441f
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/detail/trie.hpp
@@ -0,0 +1,98 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DETAIL_TRIE_HPP
+#define QUALLA_DETAIL_TRIE_HPP
+
+#include <string>
+#include <vector>
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include "fmt/format.h"
+
+namespace qualla {
+
+class SequenceMatchTrie {
+  private:
+    // datastructures for trie and end-states
+    struct TrieNode {
+        std::unordered_map<char, std::unique_ptr<TrieNode>> data;
+        TrieNode() {}
+
+        std::string str() const {
+            std::string s = "{ ";
+            for (const auto& [c, n] : data)
+                s += fmt::format("\"{:c}\": {:s}, ", c, n->str());
+            if (s.size() > 2) s.erase(s.size() - 2);
+            return s + "} ";
+        }
+    } _root;
+
+    std::unordered_set<TrieNode*> _end_states;
+    std::vector<TrieNode*>        _cur_match_state;
+
+  public:
+    SequenceMatchTrie() { clear(); }
+    SequenceMatchTrie(const std::vector<std::string>& sequences) {
+        clear();
+        build_trie(sequences);
+    }
+
+    void print_trie(TrieNode* node) { fprintf(stderr, "%s\n", node->str().c_str()); }
+
+    void build_trie(const std::vector<std::string>& sequences) {
+        // Construct the sequence trie for matching
+        for (const std::string& sequence : sequences) {
+            TrieNode* cur_node = &_root;
+            for (const char c : sequence) {
+                // Add character to the trie if it doesn't exist
+                if (!cur_node->data.contains(c)) cur_node->data[c] = std::make_unique<TrieNode>();
+
+                // Traverse to next node
+                cur_node = cur_node->data[c].get();
+            }
+
+            // Add end state of iteration as goal state
+            _end_states.insert(cur_node);
+        }
+    }
+
+    bool process_next_char(const char c) {
+        std::vector<TrieNode*> _next_match_state = {&_root};
+        for (TrieNode* state : _cur_match_state) {
+            if (!state->data.contains(c)) continue;
+
+            TrieNode* next_state = state->data[c].get();
+            if (_end_states.contains(next_state)) return true;
+            _next_match_state.push_back(next_state);
+        }
+
+        _cur_match_state = _next_match_state;
+        return false;
+    }
+
+    bool process_next_string(const std::string& s) {
+        for (const char c : s)
+            if (process_next_char(c)) return true;
+        return false;
+    }
+
+    bool empty() { return _root.data.empty(); }
+    void reset() { _cur_match_state = {&_root}; }
+
+    void clear() {
+        _root = TrieNode();
+        _end_states.clear(); // Clear out end states
+        reset();
+    }
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DETAIL_TRIE_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/dialog.hpp b/Genie/Genie/src/qualla/include/qualla/dialog.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..47953b70c8bee23091aafdc7543a7a8c9ecfc1c1
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/dialog.hpp
@@ -0,0 +1,215 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_DIALOG_HPP
+#define QUALLA_DIALOG_HPP
+
+#include <qualla/detail/exports.h>
+#include <qualla/detail/sentence.hpp>
+#include <qualla/detail/gpio-marker.hpp>
+#include <qualla/detail/trie.hpp>
+
+#include <qualla/env.hpp>
+#include <qualla/context.hpp>
+#include <qualla/tokenizer.hpp>
+#include <qualla/sampler.hpp>
+#include <qualla/engine.hpp>
+#include <qualla/DialogCallback.hpp>
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+#include <functional>
+#include <iostream>
+#include <atomic>
+#include <unordered_map>
+#include <type_traits>
+
+namespace qualla {
+
+class Dialog : public State {
+  public:
+    Dialog(std::shared_ptr<Env> env, const std::string& name, const qualla::json& conf);
+    virtual ~Dialog();
+
+    // Response callback
+    // Called for each decodable token.
+    using Callback = std::function<bool(const std::string&, Sentence::Code)>;
+
+    // Token-to-Embedding callback
+    // Called to convert each output token into an input embedding.
+    using T2ECallback = std::function<void(const int32_t token, void* embedding, const uint32_t embeddingLength)>;
+
+    // Prime LLM for a specific context.
+    QUALLA_API virtual bool prime(const std::string& str);
+
+    // Query LLM.
+    // Response is provided via Callback
+    QUALLA_API virtual bool query(const std::string& str, Sentence::Code, Callback rsp);
+
+    // Query LLM.
+    // Response is provided via Callback
+    QUALLA_API virtual bool query(const std::vector<uint32_t>& input, Sentence::Code scode, qualla::DialogCallback& callback);
+
+    QUALLA_API virtual bool query(
+            std::vector<uint8_t>& embedding_vectors,
+            Sentence::Code        scode,
+            T2ECallback           t2eCallback,
+            Dialog::Callback      callback
+    );
+
+    // Ask a complete question
+    bool ask(const std::string& str, Callback& callback) {
+        return query(str, Sentence::COMPLETE, callback);
+    }
+
+    // Reset the dialog state/history
+    QUALLA_API virtual void reset();
+
+    // Save the dialog state/history
+    QUALLA_API virtual bool save(const std::string& name = "");
+
+    // Restore the dialog state/history
+    QUALLA_API virtual bool restore(const std::string& name = "");
+
+    // Dialog KPIs
+    struct KPIs {
+        struct Tps {
+            size_t n_prompt;
+            size_t n_generate;
+            float  prompt;
+            float  generate;
+        };
+
+        Kpi init;     // init (model load, mem allocs, etc) stats
+        Kpi prompt;   // prompt processor stats
+        Kpi generate; // generator stats
+        Kpi save;     // save stats
+        Kpi restore;  // restore stats
+        Tps tps;      // TPS for prompt, generate, etc
+
+        KPIs() { reset(); }
+
+        QUALLA_API void reset(); // reset to initial state
+
+        QUALLA_API std::string dump(std::string_view sep = " ")
+                const; // dump KPIs as a formated string
+    };
+
+    // Get refs to various layers
+    Context&   context() { return *_ctx; }
+    Tokenizer& tokenizer() { return *_tokenizer; }
+    Sampler&   sampler(const std::string& role = "primary") { return *_sampler[role]; }
+    Engine&    engine(const std::string& role = "primary") { return *_engine[role]; }
+
+    // Get latest KPIs.
+    // Updates TPS, etc as needed.
+    QUALLA_API KPIs& kpis();
+
+    // List available dialog types
+    QUALLA_API static std::vector<std::string> list();
+
+    // Dialog registration
+    using Creator =
+            std::function<Dialog*(std::shared_ptr<Env>, const std::string&, const qualla::json&)>;
+    QUALLA_API static void __register(const std::string& type, Creator func);
+
+    // Create Dialog instance
+    QUALLA_API static std::unique_ptr<Dialog> create(
+            std::shared_ptr<Env> env,
+            const std::string&   name,
+            const qualla::json&  conf = {}
+    );
+    QUALLA_API static std::unique_ptr<Dialog> create(
+            std::shared_ptr<Env> env,
+            const std::string&   name,
+            std::istream&        json_stream
+    );
+    QUALLA_API static std::unique_ptr<Dialog> create(
+            std::shared_ptr<Env>         env,
+            const std::string&           name,
+            const std::filesystem::path& json_path
+    );
+    QUALLA_API virtual bool applyLoraAdapter(
+            std::string lora_adapter_name,
+            std::string engine_role
+    );
+    QUALLA_API virtual bool applyLoraStrength(
+            std::string tensor_name,
+            float       tensor_val,
+            std::string engine_role
+    );
+
+    QUALLA_API virtual int getEmbeddingLength() { return _ctx->n_embd(); };
+
+    QUALLA_API virtual int getEmbeddingBufferSize() { return _engine["primary"]->getEmbeddingBufferSize(); };
+
+  protected:
+    const std::string _type;
+
+    std::shared_ptr<Env>        _env; // Shared between multipl dialogs
+    std::unique_ptr<Context>    _ctx;
+    std::unique_ptr<Tokenizer>  _tokenizer;
+    std::unique_ptr<GpioMarker> _gpio_marker;
+
+    std::unordered_map<std::string, std::unique_ptr<Sampler>>
+                                                             _sampler; // samplers (indexed by role)
+    std::unordered_map<std::string, std::unique_ptr<Engine>> _engine;  // engines  (indexed by role)
+
+    std::string              _prompt_type;
+    std::vector<std::string> _inst_tags;
+    std::vector<std::string> _sys_tags;
+    std::vector<std::string> _role_tags;
+    std::string              _sys_prompt;
+    SequenceMatchTrie        _stop_sequence;
+
+    KPIs     _kpis;
+    uint32_t _n_queries{0};   // number of queries
+    uint32_t _n_past{0};      // number of tokens cached
+    uint32_t _n_prompt{0};    // number of prompt tokens    (last query)
+    uint32_t _n_generated{0}; // number of generated tokens (last query)
+    int32_t  _last_tok{-1};   // last generated token
+    uint32_t _n_previous_prompt{0};    // number of prompt tokens    (last query)
+    uint32_t _n_previous_generated{0}; // number of generated tokens (last query)
+
+    T2ECallback m_t2eCallback{nullptr};
+    InputType m_inputType{InputType::UNKNOWN};
+
+    // Process dialog input tokens
+    virtual bool process(std::vector<int32_t>& tokens, Callback callback) = 0;
+
+    virtual bool process(std::vector<int32_t>& tokens, DialogCallback callback) = 0;
+
+    // Process embedding vectors
+    virtual bool process(std::vector<uint8_t>& embedding_vectors, Dialog::T2ECallback t2eCallback, Dialog::Callback callback) {
+        throw std::runtime_error("embedding input type is not supported by dialog");
+    };
+    
+    // Finds the top-k tokens from the given logits, provided that their probabilities satisfy pThreshold.
+    // Since this is primarily used for multistream, each token is pushed to a separate vector to generate
+    // the first token for K streams.
+    void getTopK(std::vector<float>& logits, std::vector<std::vector<int32_t>>& tokens, size_t topK, float pThreshold, Dialog::Callback callback);
+
+    // Set error/failed state, and dispatch the callback
+    bool abort(const std::string& err, Callback& callback) {
+        State::error(err);
+        callback("", Sentence::ABORT);
+        return false;
+    }
+
+    bool abort(const std::string& err, qualla::DialogCallback& callback) {
+        State::error(err);
+        callback.callBack(nullptr, 0, Sentence::ABORT, tokenizer());
+        return false;
+    }
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DIALOG_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/embedding.hpp b/Genie/Genie/src/qualla/include/qualla/embedding.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..fd896d28d0b262b81bb65f5f731bee931436c929
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/embedding.hpp
@@ -0,0 +1,112 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_EMBEDDING_HPP
+#define QUALLA_EMBEDDING_HPP
+
+#include <qualla/detail/exports.h>
+#include <qualla/detail/sentence.hpp>
+
+#include <qualla/env.hpp>
+#include <qualla/context.hpp>
+#include <qualla/tokenizer.hpp>
+#include <qualla/engine.hpp>
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+#include <functional>
+#include <iostream>
+#include <atomic>
+
+namespace qualla {
+
+class Embedding : public State {
+  public:
+    QUALLA_API Embedding(
+            std::shared_ptr<Env> env,
+            const std::string&   name,
+            const qualla::json&  conf
+    );
+    QUALLA_API virtual ~Embedding();
+
+    // Encode sentence
+    QUALLA_API virtual bool query(const std::string& str, std::vector<float>& output);
+
+    // Embedding KPIs
+    struct KPIs {
+        struct Tps {
+            float prompt;
+        };
+
+        Kpi init;   // init (model load, mem allocs, etc) stats
+        Kpi prompt; // prompt processor stats
+        Tps tps;    // TPS for prompt, generate, etc
+
+        KPIs() { reset(); }
+        void reset(); // reset to initial state
+
+        QUALLA_API std::string dump(std::string_view sep = " ")
+                const; // dump KPIs as a formated string
+    };
+
+    // Get refs to various layers
+    Context&   context() { return *_ctx; }
+    Tokenizer& tokenizer() { return *_tokenizer; }
+    Engine&    engine() { return *_engine; }
+
+    // Get latest KPIs.
+    // Updates TPS, etc as needed.
+    QUALLA_API KPIs& kpis();
+
+    // Get output dimensions
+    QUALLA_API void output_dimensions(std::vector<std::uint32_t>& outputDimensions);
+
+    // Create Embedding instance
+    QUALLA_API static std::unique_ptr<Embedding> create(
+            std::shared_ptr<Env> env,
+            const std::string&   name,
+            const qualla::json&  conf = {}
+    );
+    QUALLA_API static std::unique_ptr<Embedding> create(
+            std::shared_ptr<Env> env,
+            const std::string&   name,
+            std::istream&        json_stream
+    );
+    QUALLA_API static std::unique_ptr<Embedding> create(
+            std::shared_ptr<Env>         env,
+            const std::string&           name,
+            const std::filesystem::path& json_path
+    );
+
+  protected:
+    const std::string _name;
+    const std::string _type;
+
+    std::shared_ptr<Env>       _env; // Shared between multiple diaglogs and embedding
+    std::unique_ptr<Context>   _ctx;
+    std::unique_ptr<Tokenizer> _tokenizer;
+    std::unique_ptr<Engine>    _engine;
+
+    bool _input_truncation;
+
+    std::vector<std::string> _tags;
+
+    std::vector<std::uint32_t> _output_dimensions{};
+
+    KPIs     _kpis;
+    uint32_t _n_queries{0}; // number of queries
+    uint32_t _n_prompt{0};  // number of prompt tokens
+
+    virtual bool process(std::vector<int32_t>& tokens, std::vector<float>& output);
+};
+
+} // namespace qualla
+
+#endif // QUALLA_DIALOG_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/engine.hpp b/Genie/Genie/src/qualla/include/qualla/engine.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..73c9bfb2950c3dbea1f97abdf2b9d33e5f26aae6
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/engine.hpp
@@ -0,0 +1,154 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_ENGINE_HPP
+#define QUALLA_ENGINE_HPP
+
+#include <qualla/detail/exports.h>
+#include <qualla/detail/kpi.hpp>
+#include <qualla/context.hpp>
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <functional>
+#include <iostream>
+
+namespace qualla {
+
+
+class Engine : public State {
+  public:
+    QUALLA_API Engine(Context& ctx, const std::string& type, const qualla::json& conf = {});
+    QUALLA_API virtual ~Engine();
+
+    // Engine features
+    struct Feature {
+        enum Flags {
+            OUTPUT_LOGITS     = (1UL << 0), // Output of this engine is Logits
+            OUTPUT_EMBEDDINGS = (1UL << 1), // Output of this engine is Embeddings
+            SAVE_RESTORE      = (1UL << 2), // Save and restore support
+            DYNAMIC_LOAD      = (1UL << 3)  // Dynamic loading / unloading support
+        };
+    };
+
+    // Get engine feature mask
+    uint32_t features() const { return _features; }
+    bool     supports(uint32_t flag) const { return _features & flag; }
+
+    // Get engine type
+    const std::string& type() const { return _type; }
+
+    // Get engine role
+    const std::string& role() const { return _role; }
+
+    // Process input tokens and generate output.
+    // The output is Logits for LLM and Embeddings for Sentence Transformers.
+    // Returns the number of tokens in the output.
+    QUALLA_API virtual size_t process(
+            const std::vector<int32_t>& tokens,
+            std::vector<float>&         output,
+            bool                        output_all = false
+    ) = 0;
+
+    // Process input tokens and generate output.
+    // The output is Logits for LLM and Embeddings for Sentence Transformers.
+    // If provided, use attention mask and postion ids based on attention_map
+    // Returns the number of tokens in the output.
+    QUALLA_API virtual size_t process(
+            const std::vector<int32_t>& tokens,
+            const std::vector<int32_t>& attention_map,
+            std::vector<float>&         output,
+            bool                        output_all = false
+    );
+
+    // for embedding as input
+    QUALLA_API virtual size_t process(
+            std::vector<uint8_t>&           embeddings,
+            const std::vector<int32_t>&   attention_map,
+            std::vector<float>&           output,
+            bool                          output_all = false
+    );
+
+    // Process input tokens without returning the output.
+    // Returns the number of tokens in the output.
+    QUALLA_API virtual size_t process(const std::vector<int32_t>& tokens);
+
+    // Synchronize the state of the context & engine.
+    // n_past is the number of tokens in the KV Cache.
+    QUALLA_API virtual bool updateKV(size_t n_past);
+    // selected: selected tokens in the current run
+    QUALLA_API virtual bool updateKV(size_t n_past, const std::vector<bool>& selected);
+
+    QUALLA_API virtual bool   save(const std::string& name);
+    QUALLA_API virtual size_t restore(const std::string& name);
+    QUALLA_API virtual void   reset();
+
+    QUALLA_API virtual bool cacheEosEmbedding(std::vector<uint8_t>& eosEmbedding);
+
+    // Calculates the expected size of an embedding vector in bytes.
+    QUALLA_API virtual size_t getEmbeddingBufferSize();
+
+    QUALLA_API virtual qualla::InputType getInputType();
+
+    // Load/unload all model/state data
+    QUALLA_API virtual bool load();
+    QUALLA_API virtual bool unload();
+
+    // Set internal/run-time engine params/data
+    QUALLA_API virtual bool set(qualla::json data);
+
+    // Get internal/run-time engine params/data
+    QUALLA_API virtual qualla::json get();
+
+    // Engine KPIs
+    struct KPIs {
+        Kpi load;
+        Kpi process;
+        Kpi update_kv;
+        Kpi unload;
+
+        KPIs() { reset(); }
+
+        QUALLA_API void reset(); // reset to initial state
+        QUALLA_API std::string dump(std::string_view sep = " ")
+                const; // dump KPIs as formated string
+    };
+
+    // Get Engine KPIs
+    KPIs& kpis() { return _kpis; }
+
+    // Get Engine context
+    Context& context() { return _ctx; }
+
+    // List available engines
+    QUALLA_API static std::vector<std::string> list();
+
+    // Create Engine instance
+    QUALLA_API static std::unique_ptr<Engine> create(Context& ctx, std::istream& json_stream);
+    QUALLA_API static std::unique_ptr<Engine> create(Context& ctx, const std::string& json_str);
+    QUALLA_API static std::unique_ptr<Engine> create(Context& ctx, const qualla::json& conf = {});
+
+    // Engine registration
+    using Creator = std::function<Engine*(Context&, const qualla::json&)>;
+    QUALLA_API static void  __register(const std::string& type, Creator func);
+    QUALLA_API virtual bool applyLoraAdapter(std::string lora_adapter_name);
+    QUALLA_API virtual bool applyLoraStrength(std::string tensor_name, float tensor_val);
+
+  protected:
+    std::string _type;        // engine type
+    std::string _role;        // engine role
+    Context&    _ctx;         // reference to the context
+    Env&        _env;         // reference to the env
+    KPIs        _kpis;        // our KPIs
+    uint32_t    _features{0}; // engine feature mask
+};
+
+} // namespace qualla
+
+#endif // QUALLA_ENGINE_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/env.hpp b/Genie/Genie/src/qualla/include/qualla/env.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..18f5c5bf59f574abe43e70705cfe35d56a563241
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/env.hpp
@@ -0,0 +1,54 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_ENV_HPP
+#define QUALLA_ENV_HPP
+
+#include <qualla/detail/exports.h>
+#include <qualla/detail/state.hpp>
+#include <qualla/detail/json.hpp>
+#include <qualla/detail/config.hpp>
+#include <qualla/logger.hpp>
+
+#include <filesystem>
+#include <memory>
+
+namespace qualla {
+
+enum InputType {
+    TOKENS = 0x01,
+    EMBEDDINGS = 0x02,
+    UNKNOWN = 0xFF
+};
+
+class Env : public State {
+  public:
+    QUALLA_API Env(const json& conf);
+    QUALLA_API ~Env();
+
+    struct Path {
+        std::filesystem::path models;
+        std::filesystem::path cache;
+    };
+
+    const Path& path() const { return _path; }
+
+    Logger& logger() { return *_logger; }
+
+    QUALLA_API static std::shared_ptr<Env> create(const qualla::json& conf = {});
+    QUALLA_API static std::shared_ptr<Env> create(std::istream& json_stream);
+    QUALLA_API static std::shared_ptr<Env> create(const std::string& json_str);
+
+  private:
+    Path                    _path;
+    std::unique_ptr<Logger> _logger;
+};
+
+} // namespace qualla
+
+#endif // QUALLA_ENV_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/logger.hpp b/Genie/Genie/src/qualla/include/qualla/logger.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..20f22d6385b34da08fb8d32f547925bef69cd275
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/logger.hpp
@@ -0,0 +1,144 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_LOGGER_HPP
+#define QUALLA_LOGGER_HPP
+
+#include <qualla/detail/exports.h>
+#include <qualla/detail/json.hpp>
+#include <qualla/detail/state.hpp>
+
+#include <string_view>
+#include <array>
+#include <vector>
+#include <stdint.h>
+
+namespace qualla {
+
+class Logger : public State {
+  public:
+    enum Section {
+        ERROR,
+        WARN,
+        INFO,
+        KPIS,
+        DEBUG,
+        TRACE, // Top level sections (general logging)
+        ENGINE_KPIS,
+        ENGINE_DEBUG,
+        ENGINE_TRACE, // Engine specific sections
+        SAMPLER_KPIS,
+        SAMPLER_DEBUG,
+        SAMPLER_TRACE, // Sampler specific sections
+        MALLOC_DEBUG,
+        KVMANAGER_TRACE // Low-level debug sections
+    };
+
+    static constexpr std::array<std::string_view, 14> section{
+            "ERROR",
+            "WARN",
+            "INFO",
+            "KPIS",
+            "DEBUG",
+            "TRACE",
+            "ENGINE-KPIS",
+            "ENGINE-DEBUG",
+            "ENGINE-TRACE",
+            "SAMPLER-KPIS",
+            "SAMPLER-DEBUG",
+            "SAMPLER-TRACE",
+            "MALLOC-DEBUG",
+            "KVMANAGER-TRACE"
+    };
+
+    QUALLA_API Logger(std::string_view type, const qualla::json& conf);
+    QUALLA_API virtual ~Logger();
+
+    // Check if the section is enabled
+    inline bool enabled(Section s) {
+        const uint32_t m = 1UL << s;
+        return ((m & _compiled_mask) && (m & _runtime_mask));
+    }
+
+    // Write a record/message into the log
+    QUALLA_API virtual void write(Section s, std::string_view msg) = 0;
+
+    // Flush buffers
+    QUALLA_API virtual void flush();
+
+    // Post a record/message into the log
+    // Checks if the section is enabled
+    inline void post(Section s, std::string_view msg) {
+        if (enabled(s)) write(s, msg);
+    }
+
+    // Post a record/message into the log using Poster function.
+    // Checks if the section is enabled.
+    using Poster = std::function<std::string()>;
+    inline void post(Section s, Poster func) {
+        if (enabled(s)) write(s, func());
+    }
+
+    // Simple helper for writing multi-line logs for specific section
+    struct Helper {
+        const Section sect;
+        Logger&       logger;
+
+        Helper(const Section s, Logger& l) : sect(s), logger(l) {}
+
+        void write(std::string_view msg) { logger.write(sect, msg); }
+    };
+
+    // Compose multi-line record/message into the log using Composer function.
+    // Checks if the section is enabled.
+    using Composer = std::function<void(Helper w)>;
+    inline void compose(Section s, Composer func) {
+        if (enabled(s)) func(Helper(s, *this));
+    }
+
+    // Simple wrappers for general logging
+    inline void warn(std::string_view msg) { post(WARN, msg); }
+    inline void warn(Poster func) { post(WARN, func); }
+
+    inline void info(std::string_view msg) { post(INFO, msg); }
+    inline void info(Poster func) { post(INFO, func); }
+
+    inline void error(std::string_view msg) { post(ERROR, msg); }
+    inline void error(Poster func) { post(ERROR, func); }
+
+    inline void debug(std::string_view msg) { post(DEBUG, msg); }
+    inline void debug(Poster func) { post(DEBUG, func); }
+
+    // List available loggers
+    QUALLA_API static std::vector<std::string> list();
+
+    // Create Logger instance
+    QUALLA_API static std::unique_ptr<Logger> create(std::istream& json_stream);
+    QUALLA_API static std::unique_ptr<Logger> create(const std::string& json_str);
+    QUALLA_API static std::unique_ptr<Logger> create(const qualla::json& conf = {});
+
+    // Engine registration
+    using Creator = std::function<Logger*(const qualla::json&)>;
+    QUALLA_API static void __register(const std::string& type, Creator func);
+
+  protected:
+    std::string _type;
+
+#ifdef QUALLA_COMPILED_LOGMASK
+    static constexpr uint32_t _compiled_mask{uint32_t(QUALLA_COMPILED_LOGMASK)};
+#else
+    static constexpr uint32_t _compiled_mask{uint32_t(0xFFFFFFFF)
+    }; // everything enabled at compile-time by default
+#endif
+
+    uint32_t _runtime_mask{0};
+};
+
+} // namespace qualla
+
+#endif // QUALLA_LOGGER_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/qualla.hpp b/Genie/Genie/src/qualla/include/qualla/qualla.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c0a1398310b8f7806a2fab6c0290f247849a1e3c
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/qualla.hpp
@@ -0,0 +1,18 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_HPP
+#define QUALLA_HPP
+
+/// \file A convenient file that includes key of QuaLLA's C++ API headers.
+
+#include <qualla/env.hpp>
+#include <qualla/context.hpp>
+#include <qualla/dialog.hpp>
+
+#endif //  QUALLA_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/sampler.hpp b/Genie/Genie/src/qualla/include/qualla/sampler.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e1f0f5249cde4a54f9316dde0b35c9bfb1b2c659
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/sampler.hpp
@@ -0,0 +1,109 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_SAMPLER_HPP
+#define QUALLA_SAMPLER_HPP
+
+#include <qualla/detail/exports.h>
+#include <qualla/context.hpp>
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <random>
+#include <span>
+#include <span>
+
+namespace qualla {
+
+class Sampler : public State {
+  public:
+    QUALLA_API Sampler(Context& ctx, const std::string& type, const qualla::json& conf);
+    QUALLA_API virtual ~Sampler();
+
+    // Sample a single token from logits
+    QUALLA_API virtual int32_t process(std::span<const float> logits) = 0;
+
+    // Sample a single token and output probabilities
+    // Probs are appended to the existing vector
+    QUALLA_API virtual int32_t process(
+            std::span<const float> logits,
+            std::vector<float>&    probs,
+            bool                   out_tok = true
+    );
+
+    // Sample a single token from logits
+    QUALLA_API int32_t process(const std::vector<float>& logits);
+
+    // Sample a single token and output probabilities
+    // Probs are appended to the existing vector
+    QUALLA_API int32_t
+    process(const std::vector<float>& logits, std::vector<float>& probs, bool out_tok = true);
+
+    QUALLA_API virtual std::vector<int32_t> process_multiple(
+            std::span<const float>& logits,
+            std::vector<float>&     probs,
+            int32_t                 num_return
+    );
+
+    QUALLA_API virtual bool save(const std::string& name);
+    QUALLA_API virtual bool restore(const std::string& name);
+    QUALLA_API virtual void reset();
+
+    // Get sampler type
+    const std::string& type() const { return _type; }
+
+    // Get sampler role
+    const std::string& role() const { return _role; }
+
+    // Get reference to the random number generator
+    std::mt19937& rng() { return _rng; }
+
+    // Get sampler params
+    int32_t seed() const { return _seed; }
+    float   temp() const { return _temp; }
+    size_t  top_k() const { return _top_k; }
+    float   top_p() const { return _top_p; }
+    bool    greedy() const { return _greedy; }
+    bool    gumbel() const { return _gumbel; }
+
+    // Set sampler params
+    void temp(float t) { _temp = t; }
+    void top_k(size_t k) { _top_k = k; }
+    void top_p(float p) { _top_p = p; }
+
+    // List available samplers
+    QUALLA_API static std::vector<std::string> list();
+
+    // Create Sampler instance
+    QUALLA_API static std::unique_ptr<Sampler> create(Context& ctx, std::istream& json_stream);
+    QUALLA_API static std::unique_ptr<Sampler> create(Context& ctx, const std::string& json_str);
+    QUALLA_API static std::unique_ptr<Sampler> create(Context& ctx, const qualla::json& conf = {});
+
+    // Sampler registration
+    using Creator = std::function<Sampler*(Context&, const qualla::json&)>;
+    QUALLA_API static void __register(const std::string& type, Creator func);
+
+  protected:
+    std::string  _type; // sampler type
+    std::string  _role; // sampler role (primary, secondary, ...)
+    Context&     _ctx;  // reference to the context
+    Env&         _env;  // reference to the environment
+    std::mt19937 _rng;
+
+    int32_t _seed{-1};
+    float   _temp{0.1};
+    size_t  _top_k{0};
+    float   _top_p{0.8};
+    bool    _greedy{false};
+    bool    _gumbel{false};
+};
+
+} // namespace qualla
+
+#endif // QUALLA_SAMPLER_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/tokenizer.hpp b/Genie/Genie/src/qualla/include/qualla/tokenizer.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..368df5ddda8cf82e2fad4a1217a25e412282423c
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/tokenizer.hpp
@@ -0,0 +1,76 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+// Based on Tokenizer.cpp from MLC-LLM project.
+// Copyright (c) 2023 by Contributors
+
+#ifndef QUALLA_TOKENIZER_HPP
+#define QUALLA_TOKENIZER_HPP
+
+#include <qualla/detail/exports.h>
+#include <qualla/context.hpp>
+
+#include <memory>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <filesystem>
+
+namespace qualla {
+
+/*!
+ * \brief a universal tokenizer that loads
+ *  either HF's tokenizer or sentence piece,
+ *  depending on the constructor
+ */
+class Tokenizer : public State {
+  public:
+    /*! \brief virtual destructor */
+    virtual ~Tokenizer() {}
+
+    /*!
+     * \brief Encode text into ids.
+     * \param text The input text.
+     * \returns The encoded token ids.
+     */
+    QUALLA_API virtual std::vector<int32_t> encode(const std::string& text) = 0;
+
+    // Encode text directly into token vector appending to existing tokens.
+    // Return number of appended tokens.
+    QUALLA_API virtual size_t encode(const std::string& text, std::vector<int32_t>& tokens) = 0;
+
+    // Use an additional flag add_bos to decide whether to add BOS token or not.
+    QUALLA_API virtual size_t encode(const std::string& text, std::vector<int32_t>& tokens, bool add_bos) = 0;
+
+    /*!
+     * \brief Decode token ids into text.
+     * \param text The token ids.
+     * \returns The decoded text.
+     */
+    QUALLA_API virtual std::string decode(const std::vector<int32_t>& ids) = 0;
+
+    //---------------------------------------------------
+    // Factory functions from byte-blobs
+    // These factory function takes in in-memory blobs
+    // so the library can be independent from filesystem
+    //---------------------------------------------------
+    /*!
+     * \brief Create HF tokenizer from a single in-memory json blob.
+     *
+     * \param json_blob The json blob.
+     * \return The created tokenzier.
+     */
+    QUALLA_API static std::unique_ptr<Tokenizer> create(Context& ctx, std::istream& json_stream);
+    QUALLA_API static std::unique_ptr<Tokenizer> create(
+            Context&                     ctx,
+            const std::filesystem::path& json_path
+    );
+};
+
+} // namespace qualla
+#endif // QUALLA_TOKENIZER_HPP
diff --git a/Genie/Genie/src/qualla/include/qualla/version.hpp b/Genie/Genie/src/qualla/include/qualla/version.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..5969dae89f175f9404091a9381f3bc865cf124da
--- /dev/null
+++ b/Genie/Genie/src/qualla/include/qualla/version.hpp
@@ -0,0 +1,26 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#ifndef QUALLA_VERSION_HPP
+#define QUALLA_VERSION_HPP
+
+#include <stdint.h>
+
+#include <qualla/detail/exports.h>
+
+namespace qualla {
+
+struct Version {
+    QUALLA_API static int32_t major();
+    QUALLA_API static int32_t minor();
+    QUALLA_API static int32_t patch();
+};
+
+} // namespace qualla
+
+#endif // QUALLA_VERSION_HPP
diff --git a/Genie/Genie/src/qualla/logger.cpp b/Genie/Genie/src/qualla/logger.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d2133b5783338b3a76f00fedefbc558a027700ce
--- /dev/null
+++ b/Genie/Genie/src/qualla/logger.cpp
@@ -0,0 +1,132 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/logger.hpp>
+#include <qualla/detail/config.hpp>
+
+#include <iostream>
+#include <string_view>
+#include <vector>
+#include <regex>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+namespace qualla {
+
+#if 0
+static std::vector<std::string_view> __split(std::string_view str, char delim)
+{
+    std::vector<std::string_view> split;
+
+    size_t i = 0, p = 0;
+
+    for (; i <= str.size(); ++i) {
+	if (i == str.size() || str[i] == delim) {
+	    split.push_back(std::string_view(str.data() + p, i - p));
+	    p = ++i;
+	}
+    }
+
+    return split;
+}
+#endif
+
+template <typename V>
+static void __apply(uint32_t& m, std::string s, V v) {
+    bool on = true;
+    if (s[0] == '!') {
+        on = false;
+        s.erase(0, 1);
+    }
+
+    // compile regex
+    std::regex re(s, std::regex::extended | std::regex::optimize);
+
+    for (size_t i = 0; i < v.size(); ++i) {
+        if (std::regex_match(std::string(v[i]), re)) {
+            if (on)
+                m |= (1UL << i);
+            else
+                m &= ~(1UL << i);
+        }
+    }
+}
+
+Logger::Logger(std::string_view type, const json& conf) : _type(type) {
+    using strvec = std::vector<std::string>;
+
+    if (conf.contains("mask")) {
+        _runtime_mask = 0;
+
+        if (conf["mask"].is_array()) {
+            strvec v = conf["mask"].get<strvec>();
+            for (auto m : v) {
+                __apply(_runtime_mask, m, this->section);
+            }
+        } else {
+            __apply(_runtime_mask, conf["mask"].get<std::string>(), this->section);
+        }
+    }
+
+    if (enabled(DEBUG)) {
+        std::cout << "QUALLA:DEBUG logger config : " << conf << "\n";
+        std::cout << fmt::format("QUALLA:DEBUG compiled log-mask: {:#x}\n", _compiled_mask);
+        std::cout << fmt::format("QUALLA:DEBUG runtime log-mask:  {:#x}\n", _runtime_mask);
+    }
+}
+
+Logger::~Logger() {}
+
+void Logger::flush() {}
+
+// Logger registry
+
+using Registry = std::unordered_map<std::string, Logger::Creator>;
+static std::unique_ptr<Registry> registry;
+
+void Logger::__register(const std::string& type, Creator func) {
+    if (!registry) registry = std::make_unique<Registry>();
+
+    Registry& r = *registry;
+    r[type]     = func;
+}
+
+std::unique_ptr<Logger> Logger::create(const qualla::json& conf) {
+    using qc         = qualla::Config;
+    std::string type = qc::optional<std::string>(conf, "output", "stdout");
+
+    if (!registry) throw std::runtime_error(type + ": logger not found");
+
+    Registry& r = *registry;
+
+    if (!r.contains(type)) throw std::runtime_error(type + ": logger not found");
+
+    return std::unique_ptr<Logger>(r[type](conf));
+}
+
+std::unique_ptr<Logger> Logger::create(std::istream& json_stream) {
+    return create(json::parse(json_stream));
+}
+
+std::unique_ptr<Logger> Logger::create(const std::string& json_str) {
+    return create(json::parse(json_str));
+}
+
+std::vector<std::string> Logger::list() {
+    std::vector<std::string> v;
+    if (!registry) return v;
+
+    Registry& r = *registry;
+
+    for (auto k : r)
+        v.push_back(k.first);
+    return v;
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/loggers/file.cpp b/Genie/Genie/src/qualla/loggers/file.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..37b1b86151be6c27447501d545f6414b7a386b62
--- /dev/null
+++ b/Genie/Genie/src/qualla/loggers/file.cpp
@@ -0,0 +1,79 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/timer.hpp>
+#include <qualla/detail/onload.hpp>
+#include <qualla/logger.hpp>
+
+#include <fmt/format.h>
+
+#include <fstream>
+#include <iostream>
+#include <filesystem>
+#include <atomic>
+#include <mutex>
+
+namespace fs = std::filesystem;
+
+namespace qualla {
+
+class FileLogger : public Logger {
+  public:
+    FileLogger(const qualla::json& json) : Logger("file", json) {
+        Config conf(json, "logger::file");
+
+        fs::path out_path(conf.mandatory<std::string>("path"));
+
+#if 0
+        if (!fs::exists(out_path.parent_path()))
+            throw std::runtime_error(out_path.string() + ": output directory does not exist");
+#endif
+
+        _stream.open(out_path, std::ios::out | std::ios::app);
+        if (!_stream.is_open())
+            throw std::runtime_error(out_path.string() + ": failed to open for writing");
+
+        _nobuf = conf.optional<bool>("unbuf", false);
+    }
+
+    virtual ~FileLogger() { _stream.close(); }
+
+    virtual void write(Section s, std::string_view msg) override;
+    virtual void flush() override;
+
+  private:
+    std::fstream _stream;
+    std::mutex   _mutex;
+    bool         _nobuf{false};
+};
+
+void FileLogger::write(Section s, std::string_view msg) {
+    qualla::Timer ts;
+    uint64_t      ts_sec  = ts.nsec() / 1000000000ULL;
+    uint64_t      ts_nsec = ts.nsec() % 1000000000ULL;
+
+    std::lock_guard<std::mutex> guard(_mutex);
+    _stream << fmt::format("{}.{:09d} QUALLA:{} {}", ts_sec, ts_nsec, this->section[s], msg)
+            << std::endl;
+    if (_nobuf) _stream << std::flush;
+}
+
+void FileLogger::flush() {
+    std::lock_guard<std::mutex> guard(_mutex);
+    _stream.flush();
+}
+
+// Registrator instance
+static OnLoad regy([]() {
+    Logger::__register("file", [](const json& conf) { return (Logger*)new FileLogger(conf); });
+});
+
+void needFileLogger() {}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/loggers/stdout.cpp b/Genie/Genie/src/qualla/loggers/stdout.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3e576e0e2ec9156b9b6b138acbb869609b579816
--- /dev/null
+++ b/Genie/Genie/src/qualla/loggers/stdout.cpp
@@ -0,0 +1,48 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/onload.hpp>
+#include <qualla/logger.hpp>
+
+#include <fmt/format.h>
+#include <iostream>
+
+namespace qualla {
+
+class StdoutLogger : public Logger {
+  public:
+    StdoutLogger(const json& conf) : Logger("stdout", conf) {
+        using qc = qualla::Config;
+        _unbuf   = qc::optional<bool>(conf, "unbuf", false);
+    }
+
+    virtual void write(Section s, std::string_view msg) override;
+    virtual void flush() override;
+
+  private:
+    bool _unbuf{false};
+};
+
+void StdoutLogger::write(Section s, std::string_view msg) {
+    std::cout << fmt::format("QUALLA:{} {}\n", this->section[s], msg);
+    if (_unbuf) std::cout << std::flush;
+}
+
+void StdoutLogger::flush() {
+    std::cout << std::flush;
+}
+
+// Registrator instance
+static OnLoad regy([]() {
+    Logger::__register("stdout", [](const json& conf) { return (Logger*)new StdoutLogger(conf); });
+});
+
+void needStdoutLogger() {}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/sampler.cpp b/Genie/Genie/src/qualla/sampler.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e8398b5f6e00e8fba89f2671af2a800d44280bd6
--- /dev/null
+++ b/Genie/Genie/src/qualla/sampler.cpp
@@ -0,0 +1,133 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/sampler.hpp>
+#include <qualla/detail/config.hpp>
+
+#include <functional>
+#include <iostream>
+#include <string>
+#include <unordered_map>
+#include <span>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+namespace qualla {
+
+Sampler::Sampler(Context& ctx, const std::string& type, const qualla::json& conf)
+    : _type(type), _ctx(ctx), _env(ctx.env()) {
+    _env.logger().debug(
+            fmt::format("sampler-new: {} ctx {} config {}", type, ctx.name(), conf.dump())
+    );
+
+    // Parse config
+    using qc = qualla::Config;
+
+    _role  = qc::optional<std::string>(conf, "role", "primary");
+    _seed  = qc::optional<int32_t>(conf, "seed", -1);
+    _temp  = qc::optional<float>(conf, "temp", 0.1);
+    _top_k = qc::optional<size_t>(conf, "top-k", 0);
+    _top_p = qc::optional<float>(conf, "top-p", 0.8);
+
+    _greedy = (_temp <= 0.f || _top_k == 1);
+    _greedy = qc::optional<bool>(conf, "greedy", _greedy);
+
+    _gumbel = qc::optional(conf, "use-gumbel", false);
+    _gumbel = qc::optional(conf, "gumbel", _gumbel);
+}
+
+Sampler::~Sampler() {}
+
+bool Sampler::restore(const std::string& name) {
+    _env.logger().warn(fmt::format("{}-sampler does not support restore", _type));
+    return false;
+}
+
+bool Sampler::save(const std::string& name) {
+    _env.logger().warn(fmt::format("{}-sampler does not support save", _type));
+    return false;
+}
+
+void Sampler::reset() {
+    _env.logger().warn(fmt::format("{}-sampler does not support reset", _type));
+}
+
+int32_t Sampler::process(const std::vector<float>& logits) {
+    return process(std::span{logits.data(),logits.size()});
+}
+
+int32_t Sampler::process(std::span<const float> logits, std::vector<float>& probs, bool out_tok) {
+    _env.logger().warn(fmt::format("{}-sampler does not support probs output", _type));
+    return -1;
+}
+
+int32_t Sampler::process(
+        const std::vector<float>& logits,
+        std::vector<float>&       probs,
+        bool                      out_tok
+) {
+    return process(std::span{logits.data(),logits.size()}, probs, out_tok);
+}
+
+std::vector<int32_t> Sampler::process_multiple(
+        std::span<const float>& logits,
+        std::vector<float>&     probs,
+        int32_t                 num_return
+) {
+    _env.logger().warn("sampler does not support num_return");
+    return {-1};
+}
+
+// Sampler registry
+
+using Registry = std::unordered_map<std::string, Sampler::Creator>;
+static std::unique_ptr<Registry> registry;
+
+void Sampler::__register(const std::string& type, Creator func) {
+    if (!registry) {
+        registry = std::make_unique<Registry>();
+    }
+
+    Registry& r = *registry;
+    r[type]     = func;
+}
+
+std::unique_ptr<Sampler> Sampler::create(Context& ctx, const qualla::json& conf) {
+    using qc         = qualla::Config;
+    std::string type = qc::optional<std::string>(conf, "type", "basic");
+
+    if (!registry) throw std::runtime_error(type + ": sampler not found");
+
+    Registry& r = *registry;
+
+    if (!r.contains(type)) throw std::runtime_error(type + ": sampler not found");
+
+    return std::unique_ptr<Sampler>(r[type](ctx, conf));
+}
+
+std::unique_ptr<Sampler> Sampler::create(Context& ctx, std::istream& json_stream) {
+    return create(ctx, json::parse(json_stream));
+}
+
+std::unique_ptr<Sampler> Sampler::create(Context& ctx, const std::string& json_str) {
+    return create(ctx, json::parse(json_str));
+}
+
+std::vector<std::string> Sampler::list() {
+    std::vector<std::string> v;
+
+    if (!registry) return v;
+
+    Registry& r = *registry;
+    for (auto k : r)
+        v.push_back(k.first);
+    return v;
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/samplers/basic.cpp b/Genie/Genie/src/qualla/samplers/basic.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..52084a336ce4e416aa2c1619f7308f213f891e3c
--- /dev/null
+++ b/Genie/Genie/src/qualla/samplers/basic.cpp
@@ -0,0 +1,224 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <qualla/sampler.hpp>
+
+#include <functional>
+#include <string>
+#include <random>
+#include <filesystem>
+#include <fstream>
+#include <ctime>
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+#include <qualla/detail/config.hpp>
+#include <qualla/detail/onload.hpp>
+#include <qualla/detail/sampler-utils.hpp>
+#include <qualla/detail/basic-sampler.hpp>
+
+#define __INFO(__fmt, ...)  _env.logger().post(Logger::INFO, fmt::format(__fmt, ##__VA_ARGS__))
+#define __WARN(__fmt, ...)  _env.logger().post(Logger::WARN, fmt::format(__fmt, ##__VA_ARGS__))
+#define __ERROR(__fmt, ...) _env.logger().post(Logger::ERROR, fmt::format(__fmt, ##__VA_ARGS__))
+#define __KPIS(__fmt, ...)                                                                         \
+    _env.logger().post(Logger::SAMPLER_KPIS, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __DEBUG(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::SAMPLER_DEBUG, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+#define __TRACE(__fmt, ...)                                                                        \
+    _env.logger().post(Logger::SAMPLER_TRACE, [&]() { return fmt::format(__fmt, ##__VA_ARGS__); })
+
+namespace fs = std::filesystem;
+
+namespace qualla {
+
+BasicSampler::BasicSampler(Context& ctx, const json& conf) : Sampler(ctx, "basic", conf) {
+    // Init rng
+    _rng.seed(_seed != -1 ? _seed : std::time(nullptr));
+}
+
+int32_t BasicSampler::_process(
+        std::span<const float> logits,
+        std::vector<float>*    probs_out,
+        bool                   tok_out
+) {
+    const size_t n_vocab = _ctx.n_vocab();
+
+    assert(logits.size() % n_vocab == 0);
+    assert(logits.size() / n_vocab == 1);
+
+    const float   temp  = _temp;
+    const int32_t top_k = _top_k <= 0 ? n_vocab : _top_k;
+    const float   top_p = _top_p;
+
+    __DEBUG("input-logits: {} ... {}", logits.first(10), logits.last(10));
+
+    IndexedLogits indexed_logits(logits, _rng);
+
+    int32_t id = -1;
+
+    if (_greedy) {
+        // Greedy sampling
+        id = indexed_logits.sampleGreedyUnsorted();
+    } else {
+        // Temperature sampling
+        if (top_k > 0) {
+            indexed_logits.topK(top_k);
+        }
+
+        indexed_logits.topP(top_p, 1);
+
+        if (_gumbel) {
+            indexed_logits.logSoftmax(temp);
+            id = tok_out ? indexed_logits.sampleUsingGumbelMax() : -1;
+        } else {
+            indexed_logits.softmax(temp);
+            id = tok_out ? indexed_logits.sampleFromProbs() : -1;
+        }
+    }
+
+    // Output probability distribution
+    if (probs_out) {
+        QUALLA_ASSERT(indexed_logits.probs_valid);
+
+        // Expand the output vector and fill it with the default values
+        probs_out->resize(
+                probs_out->size() + n_vocab, _gumbel ? -std::numeric_limits<float>::infinity() : 0
+        );
+
+        auto p = std::span(probs_out->data(), probs_out->size()).last(n_vocab);
+        for (size_t i = 0; i < indexed_logits.size(); i++) {
+            int t = (int)indexed_logits.indices[i];
+            p[t]  = indexed_logits.probs[i];
+        }
+    }
+
+    _env.logger().compose(Logger::SAMPLER_DEBUG, [&](Logger::Helper w) {
+        int32_t N = 5;
+        // FIXME: This sort over-here is disruptive to the output
+        // indexed_logits.sort(N);
+
+        const auto& I{indexed_logits};
+        w.write(fmt::format("top-{} tokens: {}", N, std::span{I.indices.data(),I.indices.size()}.first(N)));
+        w.write(fmt::format("top-{} logits: {}", N, std::span{I.logits.data(), I.logits.size()}.first(N)));
+        w.write(fmt::format("top-{} probs:  {}", N, std::span{I.probs.data(), I.probs.size()}.first(N)));
+    });
+
+    return id;
+}
+
+int32_t BasicSampler::process(std::span<const float> logits) {
+    return _process(logits, nullptr, true);
+}
+
+int32_t BasicSampler::process(
+        std::span<const float> logits,
+        std::vector<float>&    probs_out,
+        bool                   tok_out
+) {
+    return _process(logits, &probs_out, tok_out);
+}
+
+// return multiple tokens - top_k after processing, temperature, top_p, gumbel, etc
+std::vector<int32_t> BasicSampler::process_multiple(
+        std::span<const float>& logits,
+        std::vector<float>&     probs,
+        int32_t                 num_return
+) {
+    const size_t n_vocab = _ctx.n_vocab();
+
+    assert(logits.size() % n_vocab == 0);
+    assert(logits.size() / n_vocab == 1);
+
+    const float temp  = _temp;
+    const float top_p = _top_p;
+    num_return        = num_return <= 0 ? n_vocab : num_return;
+
+    __DEBUG("input-logits: {} ... {}", logits.first(10), logits.last(10));
+
+    IndexedLogits indexed_logits(logits, _rng);
+
+    std::vector<int32_t> ids;
+
+    // Temperature sampling
+    indexed_logits.topP(top_p, 1);
+    // add gumbel noise to the logits
+    if (_gumbel) {
+        indexed_logits.logSoftmax(temp);
+        indexed_logits.addGumbelNoise();
+    } else {
+        indexed_logits.softmax(temp);
+    }
+
+    num_return = num_return <= indexed_logits.indices.size() ? num_return
+                                                             : indexed_logits.indices.size();
+    indexed_logits.topK(num_return);
+    ids = indexed_logits.indices;
+    for (int i = 0; i < indexed_logits.probs.size(); i++) {
+        probs[i] = indexed_logits.probs[i];
+    }
+
+    _env.logger().compose(Logger::SAMPLER_DEBUG, [&](Logger::Helper w) {
+        int32_t N = 5;
+        // FIXME: This sort over-here is disruptive to the output
+        // indexed_logits.sort(N);
+
+        const auto& I{indexed_logits};
+        w.write(fmt::format("top-{} tokens: {}", N, std::span{I.indices.data(),I.indices.size()}.first(N)));
+        w.write(fmt::format("top-{} logits: {}", N, std::span{I.logits.data(), I.logits.size()}.first(N)));
+        w.write(fmt::format("top-{} probs:  {}", N, std::span{I.probs.data(), I.probs.size()}.first(N)));
+    });
+
+    return ids;
+}
+
+bool BasicSampler::save(const std::string& name) {
+    fs::path save_path = std::filesystem::path(name) / fmt::format("sampler.{}.rng", _role);
+
+    std::fstream f(save_path, std::ios::out | std::ios::trunc);
+    if (!f.is_open()) {
+        __ERROR("basic-sampler: failed to open {} for writing", save_path.string());
+        return false;
+    }
+
+    f << _rng;
+    f.close();
+
+    return true;
+}
+
+bool BasicSampler::restore(const std::string& name) {
+    fs::path restore_path = std::filesystem::path(name) / fmt::format("sampler.{}.rng", _role);
+
+    std::fstream f(restore_path, std::ios::in);
+    if (!f.is_open()) {
+        __ERROR("basic-sampler: failed to open {} for reading", restore_path.string());
+        return false;
+    }
+
+    f >> _rng;
+    f.close();
+
+    return true;
+}
+
+void BasicSampler::reset() {
+    // Just need to reinit rng
+    _rng.seed(_seed);
+}
+
+// Registrator instance
+static OnLoad regy([]() {
+    Sampler::__register("basic", [](Context& ctx, const json& conf) {
+        return (Sampler*)new BasicSampler(ctx, conf);
+    });
+});
+
+void needBasicSampler() {}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/tokenizer.cpp b/Genie/Genie/src/qualla/tokenizer.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..80a6388137cd9d7f414936f40c874cac2eef53b4
--- /dev/null
+++ b/Genie/Genie/src/qualla/tokenizer.cpp
@@ -0,0 +1,190 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+// Based on Tokenizers.cpp from MLC-LLM project
+// Copyright (c) 2023 by Contributors
+
+#include <qualla/tokenizer.hpp>
+
+#include <fstream>
+
+#include "tokenizers-capi.h"
+
+namespace fs = std::filesystem;
+
+namespace qualla {
+
+/*!
+ * \brief A simple c++ header of tokenizer via C API.
+ */
+class HFTokenizer : public Tokenizer {
+  public:
+    explicit HFTokenizer(Context& ctx, TokenizerHandle handle) : _ctx(ctx), _handle(handle) {}
+
+    HFTokenizer(const HFTokenizer&) = delete;
+    HFTokenizer(HFTokenizer&& other) : _ctx(other._ctx) { std::swap(other._handle, _handle); }
+
+    ~HFTokenizer() {
+        if (_handle != nullptr) {
+            tokenizers_free(_handle);
+        }
+    }
+
+    std::vector<int32_t> encode(const std::string& text) final {
+        int add_special_token = 0; // qualla handles special tokens at higher-level
+        tokenizers_encode(_handle, text.data(), text.length(), add_special_token);
+        const uint32_t* data;
+        size_t          len;
+        tokenizers_get_encode_ids(_handle, &data, &len);
+        return std::vector<int32_t>(data, data + len);
+    }
+
+    size_t encode(const std::string& text, std::vector<int32_t>& tokens) final {
+        int add_special_token = 0; // qualla handles special tokens at higher-level
+        tokenizers_encode(_handle, text.data(), text.length(), add_special_token);
+        const uint32_t* data;
+        size_t          len;
+        tokenizers_get_encode_ids(_handle, &data, &len);
+        tokens.reserve(tokens.size() + len);
+        for (size_t i = 0; i < len; i++)
+            tokens.push_back((int32_t)data[i]);
+        return len;
+    }
+
+    size_t encode(const std::string& text, std::vector<int32_t>& tokens, bool add_bos) final{
+        int add_special_token = 0;
+        tokenizers_encode(_handle, text.data(), text.length(), add_special_token);
+        const uint32_t* data;
+        size_t          len;
+        tokenizers_get_encode_ids(_handle, &data, &len);
+        if (add_bos) {
+            if (_ctx.bos_tok() >= 0) {
+                tokens.push_back(_ctx.bos_tok());
+            }
+        }
+        tokens.reserve(tokens.size() + len);
+        for (size_t i = 0; i < len; i++)
+            tokens.push_back((int32_t)data[i]);
+        return len;
+
+    }
+
+    std::string decode(const std::vector<int32_t>& ids) final {
+        int skip_special_token = 0;
+
+        if (!utf8_token_ids.empty()) {
+            // If corrupt-UTF8 character has previously been detected,
+            //      add the new token ids to the previous ids and run tokenizer.decode()
+            utf8_token_ids.insert(utf8_token_ids.end(), ids.begin(), ids.end());
+            tokenizers_decode(
+                    _handle,
+                    reinterpret_cast<const uint32_t*>(utf8_token_ids.data()),
+                    utf8_token_ids.size(),
+                    skip_special_token
+            );
+        } else {
+            tokenizers_decode(
+                    _handle,
+                    reinterpret_cast<const uint32_t*>(ids.data()),
+                    ids.size(),
+                    skip_special_token
+            );
+        }
+        const char* data;
+        size_t      len;
+        tokenizers_get_decode_str(_handle, &data, &len);
+        std::string data_str = std::string(data, len);
+
+        // Detect if the decoded string contains the corrupt-UTF8 character
+        // If yes, the decode likely needs multiple tokens. Save the current token to a vector
+        if (data_str.find("�") != std::string::npos) {
+            // fprintf(stderr, "ERROR DETECTED");
+            if (utf8_token_ids.empty())
+                utf8_token_ids.insert(utf8_token_ids.end(), ids.begin(), ids.end());
+            return std::string();
+        }
+
+        // If no corrupt-UTF8 character is detected, we know the token sequence produces valid UTF-8
+        utf8_token_ids.clear();
+
+        // Only handle utf-8 for
+        if (ids.size() == 1 && len == 6 &&
+            (data[0] == '<' && data[1] == '0' && data[2] == 'x' && data[5] == '>')) {
+            // string has format "<0xNN>", where NN is the ascii code we want.
+            char tmp_data[3] = {data[3], data[4], 0};
+            char code        = static_cast<char>(std::strtol(tmp_data, nullptr, 16));
+            // fprintf(stderr, "data=%s code=%d\n", data_str.c_str(), code);
+
+            int firstZero = 0;
+            if ((code & 1 << 7) == 0)
+                firstZero = 0;
+            else if ((code & 1 << 6) == 0)
+                firstZero = 1;
+            else if ((code & 1 << 5) == 0)
+                firstZero = 2;
+            else if ((code & 1 << 4) == 0)
+                firstZero = 3;
+            else if ((code & 1 << 3) == 0)
+                firstZero = 4;
+            // else throw std::runtime_error("0. Invalid utf-8 character encounterd" + data_str);
+            else
+                return data_str;
+
+            // fprintf(stderr, "Code=%x (%d) firstZero=%d utf8_remaining_bytes=%d\n", code, code, firstZero, utf8_remaining_bytes);
+            switch (firstZero) {
+            case 0:
+                // This is a 1-byte UTF-8 string
+                if (utf8_remaining_bytes > 0) return data_str;
+                return std::string(1, code);
+            case 1:
+                // It is a continuation byte
+                utf8_str += std::string(1, code); // Append to buffer
+                if (--utf8_remaining_bytes == 0)  // Complete utf-8 received
+                    return std::string(utf8_str); // Make a copy just in case
+                break;
+            case 2:
+            case 3:
+            case 4:
+                // Detected a new multi-byte utf-8 character
+                utf8_str             = std::string(1, code);
+                utf8_remaining_bytes = firstZero - 1;
+                break;
+            default:
+                return data_str;
+            }
+            return std::string();
+        }
+        return data_str;
+    }
+
+  private:
+    Context& _ctx;
+
+    // internal handle
+    TokenizerHandle _handle{nullptr};
+
+    std::string utf8_str;
+    int32_t     utf8_remaining_bytes = 0;
+
+    std::vector<int32_t> utf8_token_ids;
+};
+
+std::unique_ptr<Tokenizer> Tokenizer::create(Context& ctx, std::istream& json_stream) {
+    std::string data;
+    std::getline(json_stream, data, '\0');
+    return std::make_unique<HFTokenizer>(ctx, tokenizers_new_from_str(data.data(), data.length()));
+}
+
+std::unique_ptr<Tokenizer> Tokenizer::create(Context& ctx, const fs::path& json_path) {
+    if (!fs::exists(json_path))
+        throw std::runtime_error(json_path.string() + ": file does not exist");
+    std::ifstream ifs(json_path);
+    return create(ctx, ifs);
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/tokenizers/rust/Cargo.lock b/Genie/Genie/src/qualla/tokenizers/rust/Cargo.lock
new file mode 100644
index 0000000000000000000000000000000000000000..f48eb7a896e34b6d820eedccb0b6cbad3dbbee3d
--- /dev/null
+++ b/Genie/Genie/src/qualla/tokenizers/rust/Cargo.lock
@@ -0,0 +1,622 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "base64"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "cc"
+version = "1.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b9470d453346108f93a59222a9a1a5724db32d0a4727b7ab7ace4b4d822dc9"
+dependencies = [
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
+
+[[package]]
+name = "darling"
+version = "0.20.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989"
+dependencies = [
+ "darling_core",
+ "darling_macro",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.20.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.20.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
+dependencies = [
+ "darling_core",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "derive_builder"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
+dependencies = [
+ "derive_builder_macro",
+]
+
+[[package]]
+name = "derive_builder_core"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
+dependencies = [
+ "darling",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "derive_builder_macro"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
+dependencies = [
+ "derive_builder_core",
+ "syn",
+]
+
+[[package]]
+name = "either"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
+
+[[package]]
+name = "esaxx-rs"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "getrandom"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "ident_case"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
+
+[[package]]
+name = "itertools"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itertools"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.161"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1"
+
+[[package]]
+name = "log"
+version = "0.4.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
+
+[[package]]
+name = "macro_rules_attribute"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a82271f7bc033d84bbca59a3ce3e4159938cb08a9c3aebbe54d215131518a13"
+dependencies = [
+ "macro_rules_attribute-proc_macro",
+ "paste",
+]
+
+[[package]]
+name = "macro_rules_attribute-proc_macro"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568"
+
+[[package]]
+name = "memchr"
+version = "2.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "monostate"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d208407d7552cd041d8cdb69a1bc3303e029c598738177a3d87082004dc0e1e"
+dependencies = [
+ "monostate-impl",
+ "serde",
+]
+
+[[package]]
+name = "monostate-impl"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
+
+[[package]]
+name = "onig"
+version = "6.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f"
+dependencies = [
+ "bitflags",
+ "libc",
+ "once_cell",
+ "onig_sys",
+]
+
+[[package]]
+name = "onig_sys"
+version = "69.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7"
+dependencies = [
+ "cc",
+ "pkg-config",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rayon"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-cond"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
+dependencies = [
+ "either",
+ "itertools 0.11.0",
+ "rayon",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "regex"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+
+[[package]]
+name = "ryu"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
+
+[[package]]
+name = "serde"
+version = "1.0.214"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.214"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.132"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "smallvec"
+version = "1.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
+
+[[package]]
+name = "spm_precompiled"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326"
+dependencies = [
+ "base64",
+ "nom",
+ "serde",
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.66"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d171f59dbaa811dbbb1aee1e73db92ec2b122911a48e1390dfe327a821ddede"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.66"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b08be0f17bd307950653ce45db00cd31200d82b624b36e181337d9c7d92765b5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tokenizers"
+version = "0.20.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b172ffa9a2e5c31bbddc940cd5725d933ced983a9333bbebc4c7eda3bbce1557"
+dependencies = [
+ "aho-corasick",
+ "derive_builder",
+ "esaxx-rs",
+ "getrandom",
+ "itertools 0.12.1",
+ "lazy_static",
+ "log",
+ "macro_rules_attribute",
+ "monostate",
+ "onig",
+ "paste",
+ "rand",
+ "rayon",
+ "rayon-cond",
+ "regex",
+ "regex-syntax",
+ "serde",
+ "serde_json",
+ "spm_precompiled",
+ "thiserror",
+ "unicode-normalization-alignments",
+ "unicode-segmentation",
+ "unicode_categories",
+]
+
+[[package]]
+name = "tokenizers_capi"
+version = "0.1.0"
+dependencies = [
+ "serde",
+ "serde_json",
+ "tokenizers",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
+
+[[package]]
+name = "unicode-normalization-alignments"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de"
+dependencies = [
+ "smallvec",
+]
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+
+[[package]]
+name = "unicode_categories"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
+
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
+[[package]]
+name = "zerocopy"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
+dependencies = [
+ "byteorder",
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
diff --git a/Genie/Genie/src/qualla/tokenizers/rust/Cargo.toml b/Genie/Genie/src/qualla/tokenizers/rust/Cargo.toml
new file mode 100644
index 0000000000000000000000000000000000000000..b232f33062119c04131c1c77e04d6f78ad590aab
--- /dev/null
+++ b/Genie/Genie/src/qualla/tokenizers/rust/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "tokenizers_capi"
+version = "0.1.0"
+edition = "2018"
+
+[lib]
+crate-type = ["staticlib"]
+
+[dependencies]
+tokenizers = { version = "0.20.0", default-features = false, features = ["onig"] }
+serde = { version = "1.0", features = [ "derive" ] }
+serde_json = "1.0"
diff --git a/Genie/Genie/src/qualla/tokenizers/rust/src/lib.rs b/Genie/Genie/src/qualla/tokenizers/rust/src/lib.rs
new file mode 100644
index 0000000000000000000000000000000000000000..226dc702157020ef4e64013f11febbf0694400f0
--- /dev/null
+++ b/Genie/Genie/src/qualla/tokenizers/rust/src/lib.rs
@@ -0,0 +1,101 @@
+// A simple C wrapper for the tokenzier library
+
+use std::{collections::HashMap, str::FromStr};
+use tokenizers::tokenizer::Tokenizer;
+
+pub struct TokenizerWrapper {
+    tokenizer: Tokenizer,
+    encode_ids: Vec<u32>,
+    decode_str: String,
+}
+
+pub type Vocab  = HashMap<String, u32>;
+pub type Merges = Vec<(String, String)>;
+
+impl TokenizerWrapper {
+    pub fn from_str(json: &str) -> TokenizerWrapper {
+        TokenizerWrapper {
+            tokenizer: Tokenizer::from_str(json).unwrap().into(),
+            encode_ids: Vec::new(),
+            decode_str: String::new(),
+        }
+    }
+
+    pub fn encode(&mut self, text: &str, add_special_tokens: bool) {
+        self.encode_ids = Vec::from(
+            self.tokenizer
+                .encode(text, add_special_tokens)
+                .unwrap()
+                .get_ids(),
+        );
+    }
+
+    pub fn decode(&mut self, ids: &[u32], skip_special_tokens: bool) {
+        self.decode_str = self.tokenizer.decode(ids, skip_special_tokens).unwrap();
+    }
+}
+
+#[no_mangle]
+extern "C" fn tokenizers_new_from_str(input_cstr: *const u8, len: usize) -> *mut TokenizerWrapper {
+    unsafe {
+        let json = std::str::from_utf8(std::slice::from_raw_parts(input_cstr, len)).unwrap();
+        return Box::into_raw(Box::new(TokenizerWrapper::from_str(json)));
+    }
+}
+
+#[no_mangle]
+extern "C" fn tokenizers_encode(
+    handle: *mut TokenizerWrapper,
+    input_cstr: *const u8,
+    len: usize,
+    add_special_tokens: i32,
+) {
+    unsafe {
+        let input_data = std::str::from_utf8(std::slice::from_raw_parts(input_cstr, len)).unwrap();
+        (*handle).encode(input_data, add_special_tokens != 0);
+    }
+}
+
+#[no_mangle]
+extern "C" fn tokenizers_get_encode_ids(
+    handle: *mut TokenizerWrapper,
+    out_data: *mut *mut u32,
+    out_len: *mut usize,
+) {
+    unsafe {
+        *out_data = (*handle).encode_ids.as_mut_ptr();
+        *out_len  = (*handle).encode_ids.len()
+    }
+}
+
+#[no_mangle]
+extern "C" fn tokenizers_decode(
+    handle: *mut TokenizerWrapper,
+    input_ids: *const u32,
+    len: usize,
+    skip_special_tokens: i32,
+) {
+    unsafe {
+        let input_data = std::slice::from_raw_parts(input_ids, len);
+        (*handle).decode(input_data, skip_special_tokens != 0);
+    }
+}
+
+#[no_mangle]
+extern "C" fn tokenizers_get_decode_str(
+    handle: *mut TokenizerWrapper,
+    out_cstr: *mut *mut u8,
+    out_len: *mut usize,
+) {
+    unsafe {
+        *out_cstr = (*handle).decode_str.as_mut_ptr();
+        *out_len  = (*handle).decode_str.len();
+    }
+}
+
+#[no_mangle]
+extern "C" fn tokenizers_free(wrapper: *mut TokenizerWrapper) {
+    unsafe {
+        drop(Box::from_raw(wrapper));
+    }
+}
diff --git a/Genie/Genie/src/qualla/tokenizers/tokenizers-capi.h b/Genie/Genie/src/qualla/tokenizers/tokenizers-capi.h
new file mode 100644
index 0000000000000000000000000000000000000000..c71f912f85eccfebb5fd8dbccfccb1bd29f3bc52
--- /dev/null
+++ b/Genie/Genie/src/qualla/tokenizers/tokenizers-capi.h
@@ -0,0 +1,43 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+/*!
+ * Copyright (c) 2023 by Contributors
+ * \file tokenizers_capi.h
+ * \brief C binding to tokenizers rust library
+ */
+#ifndef QUALLA_TOKENIZERS_CAPI_H
+#define QUALLA_TOKENIZERS_CAPI_H
+
+// The C API
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef void* TokenizerHandle;
+
+TokenizerHandle tokenizers_new_from_str(const char* json, size_t len);
+void tokenizers_encode(TokenizerHandle handle, const char* data, size_t len, int add_special_token);
+void tokenizers_decode(
+        TokenizerHandle handle,
+        const uint32_t* data,
+        size_t          len,
+        int             skip_special_token
+);
+void tokenizers_get_decode_str(TokenizerHandle handle, const char** data, size_t* len);
+void tokenizers_get_encode_ids(TokenizerHandle handle, const uint32_t** id_data, size_t* len);
+void tokenizers_free(TokenizerHandle handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TOKENIZERS_CAPI_H
diff --git a/Genie/Genie/src/qualla/utils/kpis.cpp b/Genie/Genie/src/qualla/utils/kpis.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2fb649c9be42db405f0f58be3a78c366743840fb
--- /dev/null
+++ b/Genie/Genie/src/qualla/utils/kpis.cpp
@@ -0,0 +1,32 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "qualla/detail/kpi.hpp"
+
+#include <fmt/format.h>
+
+namespace qualla {
+
+std::string Kpi::dump(std::string_view sep) const {
+    return fmt::format(
+            "last:{:.2f}{}total:{:.2f}{}min:{:.2f}{}max:{:.2f}{}avg:{:.2f} (msec){}count:{}",
+            last_usec / 1000.0,
+            sep,
+            total_usec / 1000.0,
+            sep,
+            min_usec / 1000.0,
+            sep,
+            max_usec / 1000.0,
+            sep,
+            total_usec / (count ? count : 1) / 1000.0,
+            sep,
+            count
+    );
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/utils/threadpool.cpp b/Genie/Genie/src/qualla/utils/threadpool.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..15545ebb9b65a5cd6a52b15b811a978384ed7479
--- /dev/null
+++ b/Genie/Genie/src/qualla/utils/threadpool.cpp
@@ -0,0 +1,150 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include "qualla/detail/threadpool.hpp"
+
+#if defined(_WIN32)
+    #include "windows.h"
+
+static bool __thread_affinity(uint64_t mask) {
+    HANDLE    h = GetCurrentThread();
+    DWORD_PTR m = mask;
+
+    m = SetThreadAffinityMask(h, m);
+
+    return m != 0;
+}
+
+static int sched_yield(void) {
+    Sleep(0);
+    return 0;
+}
+
+#elif defined(__APPLE__)
+static bool __thread_affinity(uint64_t mask) {
+    return true;
+}
+
+#else // posix?
+    #include <sched.h>
+    #include <string.h>
+    #include <errno.h>
+
+static bool __thread_affinity(uint64_t mask) {
+    cpu_set_t cpuset;
+    int32_t   err;
+
+    CPU_ZERO(&cpuset);
+
+    for (uint32_t i = 0; i < 64; i++) {
+        if ((1ULL << i) & mask) {
+            CPU_SET(i, &cpuset);
+        }
+    }
+
+    #ifdef __ANDROID__
+    err = sched_setaffinity(0, sizeof(cpuset), &cpuset);
+    if (err < 0) {
+        err = errno;
+    }
+    #else
+    err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+    #endif
+    if (err != 0) {
+        fprintf(stderr,
+                "warn: failed to set affinity mask 0x%llx (err %d: %s)\n",
+                (unsigned long long)mask,
+                err,
+                strerror(err));
+        return false;
+    }
+
+    return true;
+}
+
+#endif
+
+#ifdef _MSC_VER
+
+static inline void __cpu_relax(void) {
+    YieldProcessor();
+}
+
+#else
+
+    #if defined(__aarch64__)
+
+static inline void __cpu_relax(void) {
+    __asm__ volatile("yield" ::: "memory");
+}
+
+    #else
+
+static inline void __cpu_relax(void) {
+    __asm__ volatile("rep; nop" ::: "memory");
+}
+
+    #endif
+#endif
+
+namespace qualla {
+
+void ThreadPool::stop() {
+    _queue_mutex.lock();
+    _terminate = true;
+    _queue_mutex.unlock();
+    _mutex_condition.notify_all();
+
+    for (auto& t : _threads)
+        t.join();
+    _threads.clear();
+}
+
+void ThreadPool::start(unsigned int n_threads, uint64_t cpumask, bool polling) {
+    _enable_polling = polling;
+    _n_threads      = n_threads ? n_threads : std::thread::hardware_concurrency();
+    _cpumask        = cpumask;
+    _poll           = false; // always start non-polling (enqueue will enable as needed)
+    for (uint32_t i = 0; i < _n_threads; ++i) {
+        _threads.emplace_back(std::thread(&ThreadPool::loop, this, i));
+    }
+}
+
+void ThreadPool::suspend() {
+    std::unique_lock<std::mutex> lock(_queue_mutex);
+    _poll = false;
+}
+
+void ThreadPool::loop(uint32_t ti) {
+    if (_cpumask) __thread_affinity(_cpumask);
+
+    std::unique_lock<std::mutex> lock{_queue_mutex, std::defer_lock};
+
+    while (!_terminate) {
+        lock.lock();
+
+        if (!_jobs.empty()) {
+            // Dispatch front job
+            auto j = _jobs.front();
+            _jobs.pop();
+            lock.unlock();
+            j();
+        } else {
+            // No jobs. Wait
+            if (_poll) {
+                lock.unlock();
+                __cpu_relax();
+            } else {
+                _mutex_condition.wait(lock);
+                lock.unlock();
+            }
+        }
+    }
+}
+
+} // namespace qualla
diff --git a/Genie/Genie/src/qualla/utils/utils.cpp b/Genie/Genie/src/qualla/utils/utils.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e9af93ed35145e4b61eabf4517ca9a25cef12669
--- /dev/null
+++ b/Genie/Genie/src/qualla/utils/utils.cpp
@@ -0,0 +1,9 @@
+//==============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+// dummy source for CMake
diff --git a/Genie/Model/model.cpp b/Genie/Model/model.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..05b4347399fd36a0a8087c3740fea934a18863d6
--- /dev/null
+++ b/Genie/Model/model.cpp
@@ -0,0 +1,248 @@
+//=============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//=============================================================================
+
+#include "QnnModel.hpp"
+#include "QnnOpDef.h"
+
+// Flag to determine if Backend should do node validation for each opNode added
+#define DO_GRAPH_NODE_VALIDATIONS 1
+
+#ifdef _MSC_VER
+#define MODEL_LIB_EXPORT __declspec(dllexport)
+#else
+#define MODEL_LIB_EXPORT __attribute__((visibility("default")))
+#endif
+
+using namespace qnn_wrapper_api;
+extern "C" {
+MODEL_LIB_EXPORT ModelError_t QnnModel_GenAI_composeGraphs(Qnn_BackendHandle_t backendHandle,
+                                          QNN_INTERFACE_VER_TYPE interface,
+                                          Qnn_ContextHandle_t contextHandle,
+                                          const GraphConfigInfo_t** graphsConfigInfo,
+                                          const uint32_t numGraphsConfigInfo,
+                                          uint32_t* inputDim,
+                                          uint32_t inputRank,
+                                          uint32_t* outputDim,
+                                          uint32_t outputRank,
+                                          uint32_t* kvDim,
+                                          uint32_t kvRank,
+                                          Qnn_Param_t* params,
+                                          uint32_t numParams,
+                                          GraphInfoPtr_t** graphsInfo,
+                                          uint32_t* numGraphsInfo,
+                                          bool debug,
+                                          QnnLog_Callback_t logCallback,
+                                          QnnLog_Level_t maxLogLevel) {
+  (void) logCallback;
+  (void) maxLogLevel;
+  ModelError_t err = MODEL_NO_ERROR;
+
+  /* model/graph for qnn_model*/
+  QnnModel qnn_model;
+  const QnnGraph_Config_t** graphConfigs = nullptr;
+  VALIDATE(
+      getQnnGraphConfigFromInfo("qnn_model", graphsConfigInfo, numGraphsConfigInfo, graphConfigs),
+      err);
+  VALIDATE(qnn_model.initialize(backendHandle,
+                                interface,
+                                contextHandle,
+                                "qnn_model",
+                                debug,
+                                DO_GRAPH_NODE_VALIDATIONS,
+                                graphConfigs),
+           err);
+  Qnn_Tensor_t tin;
+  tin.version = QNN_TENSOR_VERSION_1;
+  tin.v1.id = 0;
+  tin.v1.name = "x0";
+  tin.v1.type = QNN_TENSOR_TYPE_APP_WRITE;
+  tin.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER;
+  tin.v1.dataType = QNN_DATATYPE_UINT_32;
+  tin.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED;
+  tin.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
+  tin.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f,
+                                               .offset = 0};
+  tin.v1.rank = inputRank;
+  tin.v1.dimensions = inputDim;
+  tin.v1.memType = QNN_TENSORMEMTYPE_RAW;
+  tin.v1.clientBuf = {.data = nullptr, .dataSize = 0};
+  VALIDATE(qnn_model.addTensor(
+               "x0",  // Node Name
+               (Qnn_Tensor_t)tin),
+           err);
+
+  uint32_t input1Dim[1] = {1};
+  Qnn_Tensor_t tin2;
+  tin2.version = QNN_TENSOR_VERSION_1;
+  tin2.v1.id = 0;
+  tin2.v1.name = "x1";
+  tin2.v1.type = QNN_TENSOR_TYPE_APP_WRITE;
+  tin2.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER;
+  tin2.v1.dataType = QNN_DATATYPE_UINT_32;
+  tin2.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED;
+  tin2.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
+  tin2.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f,
+                                               .offset = 0};
+  tin2.v1.rank = 1;
+  tin2.v1.dimensions = input1Dim;
+  tin2.v1.memType = QNN_TENSORMEMTYPE_RAW;
+  tin2.v1.clientBuf = {.data = nullptr, .dataSize = 0};
+  VALIDATE(qnn_model.addTensor(
+    "x1",  // Node Name
+    (Qnn_Tensor_t)tin2),
+    err);
+
+  uint32_t input2Dim[1] = {1};
+  Qnn_Tensor_t tin3;
+  tin3.version = QNN_TENSOR_VERSION_1;
+  tin3.v1.id = 0;
+  tin3.v1.name = "x2";
+  tin3.v1.type = QNN_TENSOR_TYPE_APP_WRITE;
+  tin3.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER;
+  tin3.v1.dataType = QNN_DATATYPE_UINT_32;
+  tin3.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED;
+  tin3.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
+  tin3.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f,
+                                               .offset = 0};
+  tin3.v1.rank = 1;
+  tin3.v1.dimensions = input2Dim;
+  tin3.v1.memType = QNN_TENSORMEMTYPE_RAW;
+  tin3.v1.clientBuf = {.data = nullptr, .dataSize = 0};
+  VALIDATE(qnn_model.addTensor(
+    "x2",  // Node Name
+    (Qnn_Tensor_t)tin3),
+           err);
+
+  Qnn_Tensor_t tin4;
+  tin4.version = QNN_TENSOR_VERSION_1;
+  tin4.v1.id = 0;
+  tin4.v1.name = "x3";
+  tin4.v1.type = QNN_TENSOR_TYPE_APP_WRITE;
+  tin4.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER;
+  tin4.v1.dataType = QNN_DATATYPE_UINT_32;
+  tin4.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED;
+  tin4.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
+  tin4.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f,
+          .offset = 0};
+  tin4.v1.rank = kvRank;
+  tin4.v1.dimensions = kvDim;
+  tin4.v1.memType = QNN_TENSORMEMTYPE_RAW;
+  tin4.v1.clientBuf = {.data = nullptr, .dataSize = 0};
+  VALIDATE(qnn_model.addTensor(
+                   "x3",  // Node Name
+                   (Qnn_Tensor_t)tin4),
+           err);
+
+  Qnn_Tensor_t tin5;
+  tin5.version = QNN_TENSOR_VERSION_1;
+  tin5.v1.id = 0;
+  tin5.v1.name = "x4";
+  tin5.v1.type = QNN_TENSOR_TYPE_APP_WRITE;
+  tin5.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER;
+  tin5.v1.dataType = QNN_DATATYPE_UINT_32;
+  tin5.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED;
+  tin5.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
+  tin5.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f,
+          .offset = 0};
+  tin5.v1.rank = kvRank;
+  tin5.v1.dimensions = kvDim;
+  tin5.v1.memType = QNN_TENSORMEMTYPE_RAW;
+  tin5.v1.clientBuf = {.data = nullptr, .dataSize = 0};
+  VALIDATE(qnn_model.addTensor(
+                   "x4",  // Node Name
+                   (Qnn_Tensor_t)tin5),
+           err);
+
+  uint32_t input5Dim[1] = {1};
+  Qnn_Tensor_t tin6;
+  tin6.version = QNN_TENSOR_VERSION_1;
+  tin6.v1.id = 0;
+  tin6.v1.name = "x5";
+  tin6.v1.type = QNN_TENSOR_TYPE_APP_WRITE;
+  tin6.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER;
+  tin6.v1.dataType = QNN_DATATYPE_UINT_32;
+  tin6.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED;
+  tin6.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
+  tin6.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f,
+          .offset = 0};
+  tin6.v1.rank = 1;
+  tin6.v1.dimensions = input5Dim;
+  tin6.v1.memType = QNN_TENSORMEMTYPE_RAW;
+  tin6.v1.clientBuf = {.data = nullptr, .dataSize = 0};
+  VALIDATE(qnn_model.addTensor(
+                   "x5",  // Node Name
+                   (Qnn_Tensor_t)tin6),
+           err);
+
+  /* ADDING NODE FOR genAI */
+  const char* inputs_genAI[] = {"x0", "x1", "x2", "x3", "x4", "x5"};
+
+  Qnn_Tensor_t tout;
+  tout.version = QNN_TENSOR_VERSION_1;
+  tout.v1.id = 0;
+  tout.v1.name = "output_genAI";
+  tout.v1.type = QNN_TENSOR_TYPE_APP_READ;
+  tout.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER;
+  tout.v1.dataType = QNN_DATATYPE_FLOAT_32;
+  tout.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED;
+  tout.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
+  tout.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f,
+                                                .offset = 0};
+  tout.v1.rank = outputRank;
+  tout.v1.dimensions = outputDim;
+  tout.v1.memType = QNN_TENSORMEMTYPE_RAW;
+  tout.v1.clientBuf = {.data = nullptr, .dataSize = 0};
+
+  uint32_t output1Dim[1] = {1};
+  Qnn_Tensor_t tout1;
+  tout1.version = QNN_TENSOR_VERSION_1;
+  tout1.v1.id = 0;
+  tout1.v1.name = "output_npast";
+  tout1.v1.type = QNN_TENSOR_TYPE_APP_READ;
+  tout1.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER;
+  tout1.v1.dataType = QNN_DATATYPE_UINT_32;
+  tout1.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED;
+  tout1.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
+  tout1.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f,
+                                                .offset = 0};
+  tout1.v1.rank = 1;
+  tout1.v1.dimensions = output1Dim;
+  tout1.v1.memType = QNN_TENSORMEMTYPE_RAW;
+  tout1.v1.clientBuf = {.data = nullptr, .dataSize = 0};
+
+  Qnn_Tensor_t outputs_genAI[] = {(Qnn_Tensor_t)tout, (Qnn_Tensor_t)tout1};
+
+  VALIDATE(qnn_model.addNode(QNN_OPCONFIG_VERSION_1,  // Op_Config_t Version
+                             "LLM",                   // Node Name
+                             "llm_engine.oppackage",  // Package Name
+                             "LLM",                   // Qnn Node Type
+                             params,                  // Node Params
+                             numParams,               // Num Node Params
+                             inputs_genAI,            // Input Tensor Names
+                             6,                       // Num Input Tensor Names
+                             outputs_genAI,           // Output Tensors
+                             2                        // Num Output Tensors
+                             ),
+           err);
+
+  // Add all models to array to get graphsInfo
+  QnnModel* models[] = {&qnn_model};
+  uint32_t numModels = 1;
+
+  // Populate the constructed graphs in provided output variables
+  VALIDATE(getGraphInfoFromModels(*models, numModels, graphsInfo), err);
+  *numGraphsInfo = numModels;
+
+  return err;
+
+}  // PREPARE_GRAPHS
+
+MODEL_LIB_EXPORT ModelError_t QnnModel_freeGraphsInfo(GraphInfoPtr_t** graphsInfo, uint32_t numGraphsInfo) {
+  return qnn_wrapper_api::freeGraphsInfo(graphsInfo, numGraphsInfo);
+}  // FREEGRAPHINFO
+}
diff --git a/Genie/configs/bge/bge-large-genaitransformer.json b/Genie/configs/bge/bge-large-genaitransformer.json
new file mode 100644
index 0000000000000000000000000000000000000000..3188595ae5b0ee84ba9d623ac1feb87c16e0a3f8
--- /dev/null
+++ b/Genie/configs/bge/bge-large-genaitransformer.json
@@ -0,0 +1,43 @@
+{
+  "embedding" : {
+    "version" : 1,
+    "context": {
+      "version": 1,
+      "n-vocab": 30522,
+      "ctx-size": 512,
+      "embed-size" : 1024,
+      "pad-token" : 0
+    },
+    "prompt": {
+      "version" : 1,
+      "prompt-template": ["[CLS]","[SEP]"]
+    },
+    "tokenizer" : {
+      "version" : 1,
+      "path" : "your/path/to/tokenizer_file.json"
+    },
+    "truncate-input" : true,
+    "engine": {
+      "version": 1,
+      "n-threads" : 10,
+      "backend" : {
+        "version" : 1,
+        "type" : "QnnGenAiTransformer",
+        "QnnGenAiTransformer" : {
+          "version" : 1,
+          "n-layer": 24,
+          "n-embd": 1024,
+          "n-heads": 16
+        }
+      },
+      "model" : {
+        "version" : 1,
+        "type" : "library",
+        "library" : {
+          "version" : 1,
+          "model-bin" : "your/path/to/model/file.bin"
+        }
+      }
+    }
+  }
+}
diff --git a/Genie/configs/bge/bge-large-htp.json b/Genie/configs/bge/bge-large-htp.json
new file mode 100644
index 0000000000000000000000000000000000000000..1f16962778695f387f500e921373f41badfd58e7
--- /dev/null
+++ b/Genie/configs/bge/bge-large-htp.json
@@ -0,0 +1,48 @@
+{
+  "embedding" : {
+    "version" : 1,
+    "context": {
+      "version": 1,
+      "n-vocab": 30522,
+      "ctx-size": 512,
+      "embed-size" : 1024,
+      "pad-token" : 0
+    },
+    "prompt": {
+      "version" : 1,
+      "prompt-template": ["[CLS]","[SEP]"]
+    },
+    "tokenizer" : {
+      "version" : 1,
+      "path" : "your/path/to/tokenizer_file.json"
+    },
+    "truncate-input" : true,
+    "engine" : {
+      "version" : 1,
+      "backend" : {
+        "version" : 1,
+        "type" : "QnnHtp",
+        "QnnHtp" : {
+          "version" : 1,
+          "spill-fill-bufsize" : 0,
+          "use-mmap" : true,
+          "pooled-output" : true,
+          "allow-async-init": false,
+          "disable-kv-cache": true
+        },
+        "extensions" : "htp_backend_ext_config.json"
+      },
+      "model" : {
+        "version" : 1,
+        "type" : "binary",
+        "binary" : {
+          "version" : 1,
+          "ctx-bins" : [
+            "file_1_of_1.bin"
+          ]
+        }
+      }
+    }
+  }
+}
+
diff --git a/Genie/configs/htp_backend_ext_config.json b/Genie/configs/htp_backend_ext_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c9ac0ef2208b45557be9cf39392f3212ba6abf20
--- /dev/null
+++ b/Genie/configs/htp_backend_ext_config.json
@@ -0,0 +1,10 @@
+{
+  "devices": [
+    {
+      "cores":[{
+        "perf_profile": "burst",
+        "rpc_control_latency": 100
+      }]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/Genie/configs/llama2-7b/llama2-7b-draft-cpu-target-htp-spd.json b/Genie/configs/llama2-7b/llama2-7b-draft-cpu-target-htp-spd.json
new file mode 100644
index 0000000000000000000000000000000000000000..16b391c18c858a4d8f704a5439f00caff7f6696a
--- /dev/null
+++ b/Genie/configs/llama2-7b/llama2-7b-draft-cpu-target-htp-spd.json
@@ -0,0 +1,85 @@
+{
+  "dialog": {
+    "version": 1,
+    "type": "spd",
+    "context": {
+      "version": 1,
+      "size": 1024,
+      "n-vocab": 32000,
+      "bos-token": 1,
+      "eos-token": 2
+    },
+    "sampler": {
+      "version": 1,
+      "seed": 100,
+      "temp": 1.2,
+      "top-k": 20,
+      "top-p": 0.75,
+      "greedy": true
+    },
+    "tokenizer": {
+      "version": 1,
+      "path": "path_to_tokenizer_json"
+    },
+    "spd" : {
+      "version" : 1,
+      "draft-len" : 7
+    },
+    "engine": [
+      {
+        "version": 1,
+        "role": "draft",
+        "n-threads" : 6,
+        "backend" : {
+          "version" : 1,
+          "type" : "QnnGenAiTransformer",
+          "QnnGenAiTransformer" : {
+            "version" : 1
+          }
+        },
+        "model" : {
+          "version" : 1,
+          "type" : "library",
+          "library" : {
+            "version" : 1,
+            "model-bin" : "path_to_model_binary_file"
+          }
+        }
+      },
+      {
+        "version": 1,
+        "role": "target",
+        "n-threads": 3,
+        "backend": {
+          "version": 1,
+          "type": "QnnHtp",
+          "QnnHtp": {
+            "version": 1,
+            "spill-fill-bufsize": 640000000,
+            "use-mmap": true,
+            "mmap-budget": 0,
+            "poll": true,
+            "pos-id-dim": 64,
+            "cpu-mask": "0xe0",
+            "kv-dim": 128,
+            "allow-async-init": false
+          },
+          "extensions": "htp_backend_ext_config.json"
+        },
+        "model": {
+          "version": 1,
+          "type": "binary",
+          "binary": {
+            "version": 1,
+            "ctx-bins": [
+              "file_1_of_4",
+              "file_2_of_4",
+              "file_3_of_4",
+              "file_4_of_4"
+            ]
+          }
+        }
+      }
+    ]
+  }
+}
diff --git a/Genie/configs/llama2-7b/llama2-7b-draft-htp-target-htp-spd.json b/Genie/configs/llama2-7b/llama2-7b-draft-htp-target-htp-spd.json
new file mode 100644
index 0000000000000000000000000000000000000000..deba8e660921e50d0240a412a8a7bfaaec2b4ea1
--- /dev/null
+++ b/Genie/configs/llama2-7b/llama2-7b-draft-htp-target-htp-spd.json
@@ -0,0 +1,98 @@
+{
+  "dialog": {
+    "version": 1,
+    "type": "spd",
+    "context": {
+      "version": 1,
+      "size": 1024,
+      "n-vocab": 32000,
+      "bos-token": 1,
+      "eos-token": 2
+    },
+    "sampler": {
+      "version": 1,
+      "seed": 100,
+      "temp": 1.2,
+      "top-k": 20,
+      "top-p": 0.75,
+      "greedy": true
+    },
+    "tokenizer": {
+      "version": 1,
+      "path": "path_to_tokenizer_json"
+    },
+    "spd" : {
+      "version" : 1,
+      "draft-len" : 7
+    },
+    "engine": [
+      {
+        "version": 1,
+        "role": "draft",
+        "n-threads": 3,
+        "backend": {
+          "version": 1,
+          "type": "QnnHtp",
+          "QnnHtp": {
+            "version": 1,
+            "spill-fill-bufsize": 0,
+            "use-mmap": true,
+            "mmap-budget": 0,
+            "poll": true,
+            "pos-id-dim": 32,
+            "cpu-mask": "0xe0",
+            "kv-dim": 64,
+            "kv-update-method": "SHIFT_CONCAT",
+            "allow-async-init": false
+          },
+          "extensions": "htp_backend_ext_config.json"
+        },
+        "model": {
+          "version": 1,
+          "type": "binary",
+          "binary": {
+            "version": 1,
+            "ctx-bins": [
+              "file_1_of_2",
+              "file_2_of_2"
+            ]
+          }
+        }
+      },
+      {
+        "version": 1,
+        "role": "target",
+        "n-threads": 3,
+        "backend": {
+          "version": 1,
+          "type": "QnnHtp",
+          "QnnHtp": {
+            "version": 1,
+            "spill-fill-bufsize": 640000000,
+            "use-mmap": true,
+            "mmap-budget": 0,
+            "poll": true,
+            "pos-id-dim": 64,
+            "cpu-mask": "0xe0",
+            "kv-dim": 128,
+            "allow-async-init": false
+          },
+          "extensions": "htp_backend_ext_config.json"
+        },
+        "model": {
+          "version": 1,
+          "type": "binary",
+          "binary": {
+            "version": 1,
+            "ctx-bins": [
+              "file_1_of_4",
+              "file_2_of_4",
+              "file_3_of_4",
+              "file_4_of_4"
+            ]
+          }
+        }
+      }
+    ]
+  }
+}
diff --git a/Genie/configs/llama2-7b/llama2-7b-genaitransformer-htp-kv-share.json b/Genie/configs/llama2-7b/llama2-7b-genaitransformer-htp-kv-share.json
new file mode 100644
index 0000000000000000000000000000000000000000..55d3ce6df792e85542cab8d4e484a01cfceb0355
--- /dev/null
+++ b/Genie/configs/llama2-7b/llama2-7b-genaitransformer-htp-kv-share.json
@@ -0,0 +1,83 @@
+{
+  "dialog": {
+    "version": 1,
+    "type": "kv-share",
+    "context": {
+      "version": 1,
+      "size": 1024,
+      "n-vocab": 32000,
+      "bos-token": 1,
+      "eos-token": 2
+    },
+    "sampler": {
+      "version": 1,
+      "seed": 100,
+      "temp": 1.2,
+      "top-k": 40,
+      "top-p": 0.95,
+      "greedy": false
+    },
+    "tokenizer": {
+      "version": 1,
+      "path": "path_to_tokenizer.json"
+    },
+    "engine": [
+      {
+        "version": 1,
+        "role": "primary",
+        "n-threads": 3,
+        "backend": {
+          "version": 1,
+          "type": "QnnHtp",
+          "QnnHtp": {
+            "version": 1,
+            "spill-fill-bufsize": 640000000,
+            "use-mmap": true,
+            "mmap-budget": 0,
+            "poll": true,
+            "pos-id-dim": 64,
+            "cpu-mask": "0xe0",
+            "kv-dim": 128
+          },
+          "extensions": "path_to_htp_backend_ext_config.json"
+        },
+        "model": {
+          "version": 1,
+          "type": "binary",
+          "binary": {
+            "version": 1,
+            "ctx-bins": [
+              "file_1_of_4",
+              "file_2_of_4",
+              "file_3_of_4",
+              "file_4_of_4"
+            ]
+          }
+        }
+      },
+      {
+        "version": 1,
+        "role": "secondary",
+        "n-threads" : 6,
+        "backend" : {
+          "version" : 1,
+          "type" : "QnnGenAiTransformer",
+          "QnnGenAiTransformer" : {
+            "version" : 1,
+            "n-layer": 32,
+            "n-embd": 4096,
+            "n-heads": 32
+          }
+        },
+        "model" : {
+          "version" : 1,
+          "type" : "library",
+          "library" : {
+            "version" : 1,
+            "model-bin" : "path_to_model_binary_file"
+          }
+        }
+      }
+    ]
+  }
+}
diff --git a/Genie/configs/llama2-7b/llama2-7b-genaitransformer.json b/Genie/configs/llama2-7b/llama2-7b-genaitransformer.json
new file mode 100644
index 0000000000000000000000000000000000000000..482bc6315c28a4d1b388d6cc9181f0067ba1ed86
--- /dev/null
+++ b/Genie/configs/llama2-7b/llama2-7b-genaitransformer.json
@@ -0,0 +1,46 @@
+{
+  "dialog" : {
+    "version" : 1,
+    "type" : "basic",
+    "stop-sequence" : [""],
+    "max-num-tokens" : 40,
+    "context" : {
+      "version" : 1,
+      "size": 512,
+      "n-vocab": 32000,
+      "bos-token": 1,
+      "eos-token": 2
+    },
+    "sampler" : {
+      "version" : 1,
+      "seed" : 100,
+      "temp" : 1.2,
+      "top-k" : 20,
+      "top-p" : 0.75,
+      "greedy" : false
+    },
+    "tokenizer" : {
+      "version" : 1,
+      "path" : "your/path/to/tokenizer_file.json"
+    },
+    "engine" : {
+      "version" : 1,
+      "n-threads" : 10,
+      "backend" : {
+        "version" : 1,
+        "type" : "QnnGenAiTransformer",
+        "QnnGenAiTransformer" : {
+          "version" : 1
+        }
+      },
+      "model" : {
+        "version" : 1,
+        "type" : "library",
+        "library" : {
+          "version" : 1,
+          "model-bin" : "your/path/to/model/file.bin"
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/Genie/configs/llama2-7b/llama2-7b-htp-lade.json b/Genie/configs/llama2-7b/llama2-7b-htp-lade.json
new file mode 100644
index 0000000000000000000000000000000000000000..5f58c7f2f1858f645e61a9e224114cef587b881b
--- /dev/null
+++ b/Genie/configs/llama2-7b/llama2-7b-htp-lade.json
@@ -0,0 +1,65 @@
+{
+  "dialog" : {
+    "version" : 1,
+    "type" : "lade",
+    "lade" : {
+      "version" : 1,
+      "update-mode" : "ALWAYS_FWD_ONE",
+      "window" : 8,
+      "ngram" : 5,
+      "gcap" : 8
+    },
+    "context" : {
+      "version" : 1,
+      "size": 1024,
+      "n-vocab": 32000,
+      "bos-token": 1,
+      "eos-token": 2
+    },
+    "sampler" : {
+      "version" : 1,
+      "seed" : 42,
+      "temp" : 0.8,
+      "top-k" : 40,
+      "top-p" : 0.95,
+      "greedy" : true
+    },
+    "tokenizer" : {
+      "version" : 1,
+      "path" : "your/path/to/tokenizer_file.json"
+    },
+    "engine" : {
+      "version" : 1,
+      "n-threads" : 3,
+      "backend" : {
+        "version" : 1,
+        "type" : "QnnHtp",
+        "QnnHtp" : {
+          "version" : 1,
+          "spill-fill-bufsize" : 320000000,
+          "use-mmap" : true,
+          "mmap-budget" : 0,
+          "poll" : true,
+          "pos-id-dim" : 64,
+          "cpu-mask" : "0xe0",
+          "kv-dim" : 128,
+          "allow-async-init": false
+        },
+        "extensions" : "htp_backend_ext_config.json"
+      },
+      "model" : {
+        "version" : 1,
+        "type" : "binary",
+        "binary" : {
+          "version" : 1,
+          "ctx-bins" : [
+            "file_1_of_4.bin",
+            "file_2_of_4.bin",
+            "file_3_of_4.bin",
+            "file_4_of_4.bin"
+          ]
+        }
+      }
+    }
+  }
+}
diff --git a/Genie/configs/llama2-7b/llama2-7b-htp-lora.json b/Genie/configs/llama2-7b/llama2-7b-htp-lora.json
new file mode 100644
index 0000000000000000000000000000000000000000..dca87b77308ed8685e2a962a4d4774d14365e5aa
--- /dev/null
+++ b/Genie/configs/llama2-7b/llama2-7b-htp-lora.json
@@ -0,0 +1,84 @@
+{
+  "dialog" : {
+    "version" : 1,
+    "type" : "basic",
+    "context" : {
+      "version" : 1,
+      "size": 1024,
+      "n-vocab": 32000,
+      "bos-token": 1,
+      "eos-token": 2
+    },
+    "sampler" : {
+      "version" : 1,
+      "seed" : 100,
+      "temp" : 1.2,
+      "top-k" : 20,
+      "top-p" : 0.75,
+      "greedy" : true
+    },
+    "tokenizer" : {
+      "version" : 1,
+      "path" : "your/path/to/tokenizer_file.json"
+    },
+    "engine" : {
+      "version" : 1,
+      "n-threads" : 3,
+      "backend" : {
+        "version" : 1,
+        "type" : "QnnHtp",
+        "QnnHtp" : {
+          "version" : 1,
+          "spill-fill-bufsize" : 320000000,
+          "use-mmap" : true,
+          "mmap-budget" : 0,
+          "poll" : true,
+          "pos-id-dim" : 64,
+          "cpu-mask" : "0xe0",
+          "kv-dim" : 128,
+          "allow-async-init": false
+        },
+        "extensions" : "htp_backend_ext_config.json"
+      },
+      "model" : {
+        "version" : 1,
+        "type" : "binary",
+        "binary" : {
+          "version" : 1,
+          "ctx-bins" : [
+            "file_1_of_4.bin",
+            "file_2_of_4.bin",
+            "file_3_of_4.bin",
+            "file_4_of_4.bin"
+          ],
+          "lora": {
+            "version" : 1,
+            "alpha-tensor-name": "alpha",
+            "adapters" : [
+              {
+                "version" : 1,
+                "name" : "lora1",
+                "bin-sections": [
+                  "lora1_file_1_of_4.bin",
+                  "lora1_file_2_of_4.bin",
+                  "lora1_file_3_of_4.bin",
+                  "lora1_file_4_of_4.bin"
+                ]
+              },
+              {
+                "version" : 1,
+                "name" : "lora2",
+                "bin-sections": [
+                  "lora2_file_1_of_4.bin",
+                  "lora2_file_2_of_4.bin",
+                  "lora2_file_3_of_4.bin",
+                  "lora2_file_4_of_4.bin"
+                ]
+              }
+            ]
+          }
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/Genie/configs/llama2-7b/llama2-7b-htp-multistream.json b/Genie/configs/llama2-7b/llama2-7b-htp-multistream.json
new file mode 100644
index 0000000000000000000000000000000000000000..4b92aec9ac2639fbf1d4da98f09d4e39406c8bec
--- /dev/null
+++ b/Genie/configs/llama2-7b/llama2-7b-htp-multistream.json
@@ -0,0 +1,61 @@
+{
+  "dialog" : {
+    "version" : 1,
+    "type" : "multistream",
+    "multistream" : {
+      "version" : 1,
+      "n-streams" : 8,
+      "p-threshold" : 0
+    },
+    "context" : {
+      "version" : 1,
+      "size": 1024,
+      "n-vocab": 32000,
+      "bos-token": 1,
+      "eos-token": 2
+    },
+    "sampler" : {
+      "version" : 1,
+      "seed" : 42,
+      "temp" : 0.8,
+      "top-k" : 40,
+      "top-p" : 0.95
+    },
+    "tokenizer" : {
+      "version" : 1,
+      "path" : "your/path/to/tokenizer_file.json"
+    },
+    "engine" : {
+      "version" : 1,
+      "n-threads" : 3,
+      "backend" : {
+        "version" : 1,
+        "type" : "QnnHtp",
+        "QnnHtp" : {
+          "version" : 1,
+          "spill-fill-bufsize" : 0,
+          "use-mmap" : true,
+          "mmap-budget" : 0,
+          "poll" : true,
+          "pos-id-dim" : 64,
+          "cpu-mask" : "0xe0",
+          "kv-dim" : 128
+        },
+        "extensions" : "htp_backend_ext_config.json"
+      },
+      "model" : {
+        "version" : 1,
+        "type" : "binary",
+        "binary" : {
+          "version" : 1,
+          "ctx-bins" : [
+            "file_1_of_4.bin",
+            "file_2_of_4.bin",
+            "file_3_of_4.bin",
+            "file_4_of_4.bin"
+          ]
+        }
+      }
+    }
+  }
+}
diff --git a/Genie/configs/llama2-7b/llama2-7b-htp-ssd.json b/Genie/configs/llama2-7b/llama2-7b-htp-ssd.json
new file mode 100644
index 0000000000000000000000000000000000000000..4b73c2880cdec9a676995b0a4ed4f5a4ad87c36a
--- /dev/null
+++ b/Genie/configs/llama2-7b/llama2-7b-htp-ssd.json
@@ -0,0 +1,68 @@
+{
+  "dialog" : {
+    "version" : 1,
+    "type" : "ssd-q1",
+    "ssd-q1" : {
+      "version" : 1,
+      "ssd-version" : 1,
+      "forecast-token-count" : 4,
+      "forecast-prefix" : 16,
+      "forecast-prefix-name" : "forecast_prefix_name_string",
+      "branches" : [4, 4],
+      "n-streams" : 1,
+      "p-threshold" : 0.0
+    },
+    "context" : {
+      "version" : 1,
+      "size": 2048,
+      "n-vocab": 32000,
+      "bos-token": 1,
+      "eos-token": 2
+    },
+    "sampler" : {
+      "version" : 1,
+      "seed" : 42,
+      "temp" : 0.8,
+      "top-k" : 40,
+      "top-p" : 0.95,
+      "greedy" : true
+    },
+    "tokenizer" : {
+      "version" : 1,
+      "path" : "your/path/to/tokenizer_file.json"
+    },
+    "engine" : {
+      "version" : 1,
+      "n-threads" : 3,
+      "backend" : {
+        "version" : 1,
+        "type" : "QnnHtp",
+        "QnnHtp" : {
+          "version" : 1,
+          "spill-fill-bufsize" : 320000000,
+          "use-mmap" : true,
+          "mmap-budget" : 0,
+          "poll" : true,
+          "pos-id-dim" : 64,
+          "cpu-mask" : "0xe0",
+          "kv-dim" : 128,
+          "allow-async-init": false
+        },
+        "extensions" : "htp_backend_ext_config.json"
+      },
+      "model" : {
+        "version" : 1,
+        "type" : "binary",
+        "binary" : {
+          "version" : 1,
+          "ctx-bins" : [
+            "file_1_of_4.bin",
+            "file_2_of_4.bin",
+            "file_3_of_4.bin",
+            "file_4_of_4.bin"
+          ]
+        }
+      }
+    }
+  }
+}
diff --git a/Genie/configs/llama2-7b/llama2-7b-htp-windows.json b/Genie/configs/llama2-7b/llama2-7b-htp-windows.json
new file mode 100644
index 0000000000000000000000000000000000000000..de0c403d346090f8e1bed4314cf58c8e28a6e282
--- /dev/null
+++ b/Genie/configs/llama2-7b/llama2-7b-htp-windows.json
@@ -0,0 +1,58 @@
+{
+  "dialog" : {
+    "version" : 1,
+    "type" : "basic",
+    "context" : {
+      "version" : 1,
+      "size": 1024,
+      "n-vocab": 32000,
+      "bos-token": 1,
+      "eos-token": 2
+    },
+    "sampler" : {
+      "version" : 1,
+      "seed" : 42,
+      "temp" : 0.8,
+      "top-k" : 40,
+      "top-p" : 0.95,
+      "greedy" : true
+    },
+    "tokenizer" : {
+      "version" : 1,
+      "path" : "your/path/to/tokenizer_file.json"
+    },
+    "engine" : {
+      "version" : 1,
+      "n-threads" : 3,
+      "backend" : {
+        "version" : 1,
+        "type" : "QnnHtp",
+        "QnnHtp" : {
+          "version" : 1,
+          "spill-fill-bufsize" : 0,
+          "use-mmap" : false,
+          "mmap-budget" : 0,
+          "poll" : false,
+          "pos-id-dim" : 64,
+          "cpu-mask" : "0xe0",
+          "kv-dim" : 128,
+          "allow-async-init": false
+        },
+        "extensions" : "htp_backend_ext_config.json"
+      },
+      "model" : {
+        "version" : 1,
+        "type" : "binary",
+        "binary" : {
+          "version" : 1,
+          "ctx-bins" : [
+            "file_1_of_4.bin",
+            "file_2_of_4.bin",
+            "file_3_of_4.bin",
+            "file_4_of_4.bin"
+          ]
+        }
+      }
+    }
+  }
+}
diff --git a/Genie/configs/llama2-7b/llama2-7b-htp.json b/Genie/configs/llama2-7b/llama2-7b-htp.json
new file mode 100644
index 0000000000000000000000000000000000000000..3dd67a5bcab5529396de2bfda2c989930acba375
--- /dev/null
+++ b/Genie/configs/llama2-7b/llama2-7b-htp.json
@@ -0,0 +1,66 @@
+{
+  "dialog" : {
+    "version" : 1,
+    "type" : "basic",
+    "max-num-tokens" : 40,
+    "context" : {
+      "version" : 1,
+      "size": 1024,
+      "n-vocab": 32000,
+      "bos-token": 1,
+      "eos-token": 2
+    },
+    "sampler" : {
+      "version" : 1,
+      "seed" : 42,
+      "temp" : 0.8,
+      "top-k" : 40,
+      "top-p" : 0.95,
+      "greedy" : true
+    },
+    "tokenizer" : {
+      "version" : 1,
+      "path" : "your/path/to/tokenizer_file.json"
+    },
+    "engine" : {
+      "version" : 1,
+      "n-threads" : 3,
+      "backend" : {
+        "version" : 1,
+        "type" : "QnnHtp",
+        "QnnHtp" : {
+          "version" : 1,
+          "spill-fill-bufsize" : 320000000,
+          "use-mmap" : true,
+          "mmap-budget" : 0,
+          "poll" : true,
+          "cpu-mask" : "0xe0",
+          "kv-dim" : 128,
+          "allow-async-init": false
+        },
+        "extensions" : "htp_backend_ext_config.json"
+      },
+      "model" : {
+        "version" : 1,
+        "type" : "binary",
+        "binary" : {
+          "version" : 1,
+          "ctx-bins" : [
+            "file_1_of_4.bin",
+            "file_2_of_4.bin",
+            "file_3_of_4.bin",
+            "file_4_of_4.bin"
+          ]
+        },
+        "positional-encoding" : {
+          "type" : "rope",
+          "rope-dim" : 64,
+          "rope-theta" : 10000,
+          "rope-scaling" : {
+            "rope-type" : "default"
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/Genie/configs/llama3-8b/llama3-8b-htp.json b/Genie/configs/llama3-8b/llama3-8b-htp.json
new file mode 100644
index 0000000000000000000000000000000000000000..d9061158aebac38cb920821e0269ad2e159da1a5
--- /dev/null
+++ b/Genie/configs/llama3-8b/llama3-8b-htp.json
@@ -0,0 +1,59 @@
+{
+  "dialog" : {
+    "version" : 1,
+    "type" : "basic",
+    "context" : {
+      "version" : 1,
+      "size": 4096,
+      "n-vocab":   128256,
+      "bos-token": 128000,
+      "eos-token": 128001,
+      "eot-token": 128009
+    },
+    "sampler" : {
+      "version" : 1,
+      "seed" : 42,
+      "temp" : 0.8,
+      "top-k" : 40,
+      "top-p" : 0.95
+    },
+    "tokenizer" : {
+      "version" : 1,
+      "path" : "your/path/to/tokenizer_file.json"
+    },
+    "engine" : {
+      "version" : 1,
+      "n-threads" : 3,
+      "backend" : {
+        "version" : 1,
+        "type" : "QnnHtp",
+        "QnnHtp" : {
+          "version" : 1,
+          "use-mmap" : true,
+          "spill-fill-bufsize" : 0,
+          "mmap-budget" : 0,
+          "poll" : true,
+          "pos-id-dim" : 64,
+          "cpu-mask" : "0xe0",
+          "kv-dim" : 128,
+          "rope-theta": 10000
+        },
+        "extensions" : "htp_backend_ext_config.json"
+      },
+      "model" : {
+        "version" : 1,
+        "type" : "binary",
+        "binary" : {
+          "version" : 1,
+          "ctx-bins" : [
+            "file_1_of_5.bin",
+            "file_2_of_5.bin",
+            "file_3_of_5.bin",
+            "file_4_of_5.bin",
+            "file_5_of_5.bin"
+          ]
+        }
+      }
+    }
+  }
+}
diff --git a/Genie/configs/llava-e2t/llava-e2t-htp.json b/Genie/configs/llava-e2t/llava-e2t-htp.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b246870ef0e0da9fa1bd566892c6eb855a0ddb5
--- /dev/null
+++ b/Genie/configs/llava-e2t/llava-e2t-htp.json
@@ -0,0 +1,65 @@
+{
+  "dialog" : {
+    "version" : 1,
+    "type" : "basic",
+    "max-num-tokens" : 200,
+    "embedding" : {
+      "version" : 1,
+      "size" : 4096,
+      "datatype" : "float32"
+    },
+    "context" : {
+      "version" : 1,
+      "size" : 2048,
+      "n-vocab" : 32000,
+      "bos-token" : 1,
+      "eos-token" : 2
+    },
+    "sampler" : {
+      "version" : 1,
+      "seed" : 42,
+      "temp" : 0.0,
+      "top-k" : 1,
+      "top-p" : 1.0,
+      "greedy" : true
+    },
+    "tokenizer" : {
+      "version" : 1,
+      "path" : "your/path/to/tokenizer_file.json"
+    },
+    "engine" : {
+      "version" : 1,
+      "n-threads" : 3,
+      "backend" : {
+        "version" : 1,
+        "type" : "QnnHtp",
+        "QnnHtp" : {
+          "version" : 1,
+          "spill-fill-bufsize" : 320000000,
+          "use-mmap" : true,
+          "mmap-budget" : 0,
+          "poll" : true,
+          "pos-id-dim" : 64,
+          "cpu-mask" : "0xe0",
+          "kv-dim" : 128,
+          "allow-async-init" : false,
+          "rope-theta" : 10000
+        },
+        "extensions" : "htp_backend_ext_config.json"
+      },
+      "model" : {
+        "version" : 1,
+        "type" : "binary",
+        "binary" : {
+          "version" : 1,
+          "ctx-bins" : [
+            "file_1_of_4.bin",
+            "file_2_of_4.bin",
+            "file_3_of_4.bin",
+            "file_4_of_4.bin"
+          ]
+        }
+      }
+    }
+  }
+}
diff --git a/Genie/genie-t2t-run/Makefile b/Genie/genie-t2t-run/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..1e63967c670955a431644f29f1c3293577d02182
--- /dev/null
+++ b/Genie/genie-t2t-run/Makefile
@@ -0,0 +1,45 @@
+#=============================================================================
+#
+#  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+#  All Rights Reserved.
+#  Confidential and Proprietary - Qualcomm Technologies, Inc.
+#
+#=============================================================================
+
+# specify compiler
+export CXX := clang++-14
+export PATH := $(ANDROID_NDK_ROOT)/toolchains/llvm/prebuilt/linux-x86_64/bin:$(PATH)
+.PHONY: all x86 android clean clean_x86 clean_android
+.DEFAULT: x86
+
+all: x86 android
+x86:
+	@$(MAKE) -f make/Makefile.linux-x86_64 CPATH="/usr/include/x86_64-linux-gnu" || (echo "-------------------- Failed to build genie-t2t-run for x86 --------------------"; exit 1; )
+	@echo "-------------------- genie-t2t-run build for x86 succeeded -------------------- "
+
+android: check_ndk
+	@$(ANDROID_NDK_ROOT)/ndk-build APP_ALLOW_MISSING_DEPS=true APP_ABI="arm64-v8a" NDK_PROJECT_PATH=./ NDK_APPLICATION_MK=make/Application.mk APP_BUILD_SCRIPT=make/Android.mk || (echo "-------------------- Failed to build genie-t2t-run for android --------------------"; exit 1; )
+	@$(rename_target_dirs)
+	@echo "-------------------- genie-t2t-run build for android succeeded-------------------- "
+
+clean: clean_x86 clean_android
+
+clean_x86:
+	@$(MAKE) -f make/Makefile.linux-x86_64 clean
+
+clean_android:
+	if [ -d "bin/aarch64-android" ]; then rm -rf bin/aarch64-android; fi
+	if [ -d "obj/local" ]; then rm -rf obj/local; fi
+
+rename_target_dirs = \
+				@if [ -d ./bin/aarch64-android ]; then rm -rf ./bin/aarch64-android; fi; \
+				mkdir -p bin/aarch64-android \
+				&& mv ./libs/arm64-v8a/genie-t2t-run bin/aarch64-android \
+				&& mv ./libs/arm64-v8a/libc++_shared.so bin/aarch64-android \
+				&& rm -rf ./libs \
+
+
+check_ndk:
+ifeq ($(ANDROID_NDK_ROOT),)
+	$(error ERROR: ANDROID_NDK_ROOT not set, skipping compilation for Android platform(s).)
+endif
diff --git a/Genie/genie-t2t-run/main.cpp b/Genie/genie-t2t-run/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..aea8d2b9b2906c88c7fbfd7db4ba725441e586ca
--- /dev/null
+++ b/Genie/genie-t2t-run/main.cpp
@@ -0,0 +1,692 @@
+//=============================================================================
+//
+//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//=============================================================================
+
+#include <chrono>
+#include <exception>
+#include <filesystem>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "GenieCommon.h"
+#include "GenieDialog.h"
+
+std::string config{};
+std::string prompt{};
+std::string savePath{};
+std::string restorePath{};
+
+#if defined(GENIE_LORA_FEATURE)
+std::string loraAdapterName{};
+std::string loraAlphaName{};
+float loraAlphaValue = 1.0f;
+#endif
+
+#if defined(GENIE_E2T_FEATURE)
+std::shared_ptr<void> embeddingBuffer;
+size_t embeddingBufferSize{0};
+std::string inputDataType{"N/A"};
+double inputScale{1.0};
+int32_t inputOffset{0};
+
+std::shared_ptr<void> embeddingLut;
+size_t embeddingLutSize{0};
+std::string lutDataType{"N/A"};
+double lutScale{1.0};
+int32_t lutOffset{0};
+
+double requantScale{1.0};
+double requantOffset{0};
+#endif
+
+std::vector<uint32_t> tokens;
+
+std::unordered_set<std::string> commandLineArguments;
+std::unordered_map<std::string, std::pair<bool, bool>> m_options;
+
+bool isSet(const std::string& name) {
+  auto sought = m_options.find(name);
+  return (sought != m_options.end()) && (sought->second).first;
+}
+
+bool isRequired(const std::string& name) {
+  auto sought = m_options.find(name);
+  return (sought != m_options.end()) && (sought->second).second;
+}
+
+void addOption(const std::string& name, bool set, bool isRequired) {
+  m_options.emplace(name, std::make_pair(set, isRequired));
+}
+
+std::streamsize getFileSize(const std::string& filename) {
+  std::ifstream file(filename, std::ios::binary | std::ios::ate);
+  return file.tellg();
+}
+
+bool checkFileExistsAndReadable(const std::ifstream& fileStream, const std::string& fileName) {
+  bool res = fileStream.good();
+  if (!res) {
+    std::cout << std::setw(24) << "File " << fileName << " doesn't exists or is in bad shape."
+              << std::endl;
+  }
+  return res;
+}
+
+void printUsage(const char* program) {
+  std::cout << "Usage:\n" << program << " [options]\n" << std::endl;
+  std::cout << "Options:" << std::endl;
+
+  int width = 88;
+
+  std::cout << std::left << std::setw(width) << "  -h, --help";
+  std::cout << "Show this help message and exit.\n" << std::endl;
+
+  std::cout << std::setw(width) << "  -c CONFIG_FILE or --config CONFIG_FILE";
+  std::cout << "Dialog JSON configuration file.\n" << std::endl;
+
+  std::cout << std::setw(width) << "  -p PROMPT or --prompt PROMPT";
+  std::cout << "Prompt to query. Mutually exclusive with --prompt_file.\n" << std::endl;
+
+  std::cout << std::setw(width) << "  --prompt_file PATH";
+  std::cout << "Prompt to query provided as a file. Mutually exclusive with --prompt." << std::endl;
+
+#if defined(GENIE_LORA_FEATURE)
+  std::cout << std::endl;
+  std::cout
+      << std::setw(width)
+      << "  -l ADAPTER_NAME,ALPHA_NAME,ALPHA_VALUE or --lora ADAPTER_NAME,ALPHA_NAME,ALPHA_VALUE";
+  std::cout << "Apply a LoRA adapter to a dialog." << std::endl;
+  std::cout
+      << std::setw(width) << ""
+      << "ALPHA_NAME and ALPHA_VALUE are optional parameters, only for setting alpha strength."
+      << std::endl;
+#endif
+
+#if defined(GENIE_E2T_FEATURE)
+  std::cout << std::endl;
+  std::cout << std::setw(width) << "  -e PATH or --embedding_file PATH[,TYPE,SCALE,OFFSET]";
+  std::cout << "Input embeddings provided as a file. Mutually exclusive with --prompt, "
+               "--prompt_file and --tokens_file."
+            << std::endl;
+  std::cout << std::setw(width) << ""
+            << "TYPE, SCALE, and OFFSET are optional parameters representing the model's input "
+               "quantization encodings. Required for lookup table requantization."
+            << std::endl;
+  std::cout << std::setw(width) << ""
+            << "Valid values of TYPE are int8, int16, uint8, uint16. The signedness must be "
+               "consistent with the lookup table encodings."
+            << std::endl;
+  std::cout << std::endl;
+  std::cout << std::setw(width) << "  -t PATH or --embedding_table PATH[,TYPE,SCALE,OFFSET]";
+  std::cout << "Token-to-Embedding lookup table provided as a file. Mutually exclusive with "
+               "--prompt and --prompt_file."
+            << std::endl;
+  std::cout << std::setw(width) << ""
+            << "TYPE, SCALE, and OFFSET are optional parameters representing the lookup table's "
+               "quantization encodings. Required for lookup table requantization."
+            << std::endl;
+  std::cout << std::setw(width) << ""
+            << "Valid values of TYPE are int8, int16, uint8, uint16. The signedness must be "
+               "consistent with the input layer encodings."
+            << std::endl;
+#endif
+  std::cout << std::endl;
+  std::cout << std::setw(width) << "  -tok PATH or --tokens_file PATH";
+  std::cout << "Input tokens provided as a file. Mutually exclusive with --prompt, --prompt_file "
+               "and --embedding_file."
+            << std::endl;
+}
+
+std::vector<std::string> split(const std::string& str) {
+  std::vector<std::string> words;
+
+  std::string::size_type pos  = 0;
+  std::string::size_type prev = 0;
+  while ((pos = str.find(',', pos)) != std::string::npos) {
+    std::string word = str.substr(prev, pos - prev);
+    if (word.length() > 0) {
+      words.push_back(word);
+    }
+    prev = ++pos;
+  }
+  std::string word = str.substr(prev, pos - prev);
+  if (word.length() > 0) {
+    words.push_back(word);
+  }
+
+  return words;
+}
+
+bool parseE2TArguments(const std::string arg,
+                       std::string& filename,
+                       std::string& dataType,
+                       double& scale,
+                       int32_t& offset) {
+  auto args = split(arg);
+  if (args.size() == 1) {
+    filename = args[0];
+  } else if (args.size() == 4) {
+    filename = args[0];
+    dataType = args[1];
+    if ((dataType != "int8") && (dataType != "uint8") && (dataType != "int16") &&
+        (dataType != "uint16")) {
+      std::cerr << "ERROR: invalid datatype: " << dataType << std::endl;
+      return false;
+    }
+    try {
+      scale  = std::stod(args[2]);
+      offset = std::stoi(args[3]);
+    } catch (const std::exception& e) {
+      std::cerr << "ERROR: Invalid quantization encodings: {" << args[2] << ", " << args[3] << "}"
+                << std::endl;
+      return false;
+    }
+  } else {
+    std::cerr << "ERROR: Invalid embedding argument: " << arg << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool parseCommandLineInput(int argc, char** argv) {
+  bool invalidParam = false;
+  std::string arg;
+  if (argc == 1) {
+    printUsage(argv[0]);
+    std::exit(EXIT_SUCCESS);
+  }
+  for (int i = 1; i < argc; i++) {
+    arg = argv[i];
+    commandLineArguments.insert(arg);
+    if (arg == "-h" || arg == "--help") {
+      printUsage(argv[0]);
+      std::exit(EXIT_SUCCESS);
+    } else if (arg == "-c" || arg == "--config") {
+      if (++i >= argc) {
+        invalidParam = true;
+        break;
+      }
+      std::ifstream configStream = std::ifstream(argv[i]);
+
+      if (!checkFileExistsAndReadable(configStream,
+                                      argv[i])) {  // Error encountered don't go further
+        return false;
+      }
+
+      std::getline(configStream, config, '\0');
+      addOption("--config", true, false);
+    } else if (arg == "-s" || arg == "--save") {
+      if (++i >= argc) {
+        invalidParam = true;
+        break;
+      }
+      savePath = argv[i];
+      addOption("--save", true, false);
+    } else if (arg == "-r" || arg == "--restore") {
+      if (++i >= argc) {
+        invalidParam = true;
+        break;
+      }
+      restorePath = argv[i];
+      addOption("--restore", true, false);
+    } else if (arg == "-p" || arg == "--prompt") {
+      if (++i >= argc) {
+        invalidParam = true;
+        break;
+      }
+      prompt = argv[i];
+      addOption("--prompt", true, false);
+    } else if (arg == "--prompt_file") {
+      if (++i >= argc) {
+        invalidParam = true;
+        break;
+      }
+      std::ifstream promptStream(argv[i]);
+
+      if (!checkFileExistsAndReadable(promptStream, argv[i])) {
+        return false;
+      }
+
+      std::getline(promptStream, prompt, '\0');
+      addOption("--prompt_file", true, false);
+#if defined(GENIE_LORA_FEATURE)
+    } else if (arg == "-l" || arg == "--lora") {
+      if (++i >= argc) {
+        invalidParam = true;
+        break;
+      }
+
+      auto args = split(argv[i]);
+      if (args.size() == 1)
+        loraAdapterName = args[0];
+      else if (args.size() == 3) {
+        loraAdapterName = args[0];
+        loraAlphaName   = args[1];
+        try {
+          loraAlphaValue = std::stof(args[2]);
+        } catch (const std::exception& e) {
+          std::cerr << "ERROR: Invalid LoRA alpha tensor strength: " << args[2] << std::endl;
+          printUsage(argv[0]);
+          return false;
+        }
+      } else {
+        std::cerr << "ERROR: Invalid --lora argument: " << argv[i] << std::endl;
+        printUsage(argv[0]);
+        return false;
+      }
+      addOption("--lora", true, false);
+#endif
+#if defined(GENIE_E2T_FEATURE)
+    } else if (arg == "-e" || arg == "--embedding_file") {
+      if (++i >= argc) {
+        invalidParam = true;
+        break;
+      }
+
+      std::string filename;
+
+      if (!parseE2TArguments(argv[i], filename, inputDataType, inputScale, inputOffset)) {
+        return false;
+      }
+
+      uint32_t fileSize = getFileSize(filename);
+
+      embeddingBuffer     = std::shared_ptr<void>(new int8_t[fileSize]);
+      embeddingBufferSize = fileSize;
+      std::ifstream embeddingStream(filename, std::ifstream::binary);
+
+      if (!checkFileExistsAndReadable(embeddingStream,
+                                      filename)) {  // Error encountered don't go further
+        return false;
+      }
+
+      embeddingStream.read(static_cast<char*>(embeddingBuffer.get()), fileSize);
+      addOption("--embedding_file", true, false);
+    } else if (arg == "-t" || arg == "--embedding_table") {
+      if (++i >= argc) {
+        invalidParam = true;
+        break;
+      }
+
+      std::string filename;
+
+      if (!parseE2TArguments(argv[i], filename, lutDataType, lutScale, lutOffset)) {
+        return false;
+      }
+
+      uint32_t fileSize = getFileSize(filename);
+
+      embeddingLut     = std::shared_ptr<void>(new int8_t[fileSize]);
+      embeddingLutSize = fileSize;
+      std::ifstream embeddingTable(filename, std::ifstream::binary);
+
+      if (!checkFileExistsAndReadable(embeddingTable,
+                                      filename)) {  // Error encountered don't go further
+        return false;
+      }
+
+      embeddingTable.read(static_cast<char*>(embeddingLut.get()), fileSize);
+      addOption("--embedding_table", true, false);
+#endif
+    } else if (arg == "-tok" || arg == "--tokens_file") {
+      if (++i >= argc) {
+        invalidParam = true;
+        break;
+      }
+      std::ifstream file(argv[i]);
+      while (std::getline(file, prompt)) {
+        std::istringstream iss(prompt);
+        uint32_t token;
+        while (iss >> token) {
+          tokens.push_back(token);
+        }
+      }
+      addOption("--prompt_file", true, false);
+    } else {
+      std::cerr << "Unknown option: " << arg << std::endl;
+      printUsage(argv[0]);
+      return false;
+    }
+  }
+  if (invalidParam) {
+    std::cerr << "ERROR: Invalid parameter for argument: " << arg << std::endl;
+    printUsage(argv[0]);
+    return false;
+  }
+#if defined(GENIE_E2T_FEATURE)
+  if (isSet("--embedding_file")) {
+    if (isSet("--prompt") || isSet("--prompt_file") || isSet("--tokens_file")) {
+      std::cerr << "ERROR:: Please do not provide a text/token prompt and embedding prompt at the "
+                   "same time."
+                << std::endl;
+      return false;
+    }
+  } else if (isSet("--embedding_table")) {
+    std::cerr << "ERROR:: Please provide an embedding file using --embedding_file." << std::endl;
+    return false;
+  } else
+#endif
+      if (isSet("--tokens_file")) {
+    if (isSet("--prompt") || isSet("--prompt_file") || isSet("--embedding_file")) {
+      std::cerr << "ERROR:: Please do not provide a text prompt/embedding file and tokens file at "
+                   "the same time."
+                << std::endl;
+      return false;
+    }
+  } else if (!isSet("--prompt") && !isSet("--prompt_file")) {
+    std::cerr << "ERROR:: Please provide prompt using --prompt or --prompt_file." << std::endl;
+    return false;
+  } else if (isSet("--prompt") && isSet("--prompt_file")) {
+    std::cerr << "ERROR:: Please provide only one of --prompt or --prompt_file." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+void queryCallback(const char* responseStr,
+                   const GenieDialog_SentenceCode_t sentenceCode,
+                   const void*) {
+  switch (sentenceCode) {
+    case GENIE_DIALOG_SENTENCE_COMPLETE:
+      std::cout << "[COMPLETE]: " << std::flush;
+      break;
+    case GENIE_DIALOG_SENTENCE_BEGIN:
+      std::cout << "[BEGIN]: " << std::flush;
+      break;
+    case GENIE_DIALOG_SENTENCE_CONTINUE:
+      break;
+    case GENIE_DIALOG_SENTENCE_END:
+      std::cout << "[END]" << std::flush;
+      break;
+    case GENIE_DIALOG_SENTENCE_ABORT:
+      std::cout << "[ABORT]: " << std::flush;
+      break;
+    default:
+      std::cout << "[UNKNOWN]: " << std::flush;
+      break;
+  }
+  if (responseStr) {
+    std::cout << responseStr << std::flush;
+  }
+}
+
+#if defined(GENIE_E2T_FEATURE)
+void tokenToEmbedCallback(const int32_t token,
+                          void* embedding,
+                          const uint32_t embeddingSize,
+                          const void* userData) {
+  const size_t lutIndex = token * embeddingSize;
+  if ((lutIndex + embeddingSize) <= embeddingLutSize) {
+    int8_t* embeddingSrc = static_cast<int8_t*>(embeddingLut.get()) + lutIndex;
+    int8_t* embeddingDst = static_cast<int8_t*>(embedding);
+    std::copy(embeddingSrc, embeddingSrc + embeddingSize, embeddingDst);
+  } else {
+    std::cerr << "Error: T2E conversion overflow." << std::endl;
+  }
+}
+
+void calculateRequantEncodings() {
+  requantScale  = lutScale / inputScale;
+  requantOffset = lutScale * lutOffset / inputScale - inputOffset;
+}
+
+template <class F, class T>
+void requantEmbedding(F* from, T* to, size_t length) {
+  for (int i = 0; i < length; i++) {
+    to[i] = static_cast<T>(requantScale * from[i] + requantOffset);
+  }
+}
+
+template <class F, class T>
+void tokenToEmbedRequantCallback(const int32_t token,
+                                 void* embedding,
+                                 const uint32_t embeddingSize,
+                                 const void* userData) {
+  const size_t numElements = embeddingSize / sizeof(T);
+  const size_t lutIndex    = token * numElements;
+  if ((lutIndex + numElements) * sizeof(F) <= embeddingLutSize) {
+    F* embeddingSrc = static_cast<F*>(embeddingLut.get()) + (lutIndex);
+    T* embeddingDst = static_cast<T*>(embedding);
+    requantEmbedding(embeddingSrc, embeddingDst, numElements);
+  } else {
+    std::cerr << "Error: T2E conversion overflow." << std::endl;
+  }
+}
+#endif
+
+void tokenToTokenCallback(const uint32_t* token,
+                          const uint32_t tokensLength,
+                          const GenieDialog_SentenceCode_t sentenceCode,
+                          const void*) {
+  switch (sentenceCode) {
+    case GENIE_DIALOG_SENTENCE_COMPLETE:
+      std::cout << "[COMPLETE]: " << std::flush;
+      break;
+    case GENIE_DIALOG_SENTENCE_BEGIN:
+      std::cout << "[BEGIN]: " << std::flush;
+      break;
+    case GENIE_DIALOG_SENTENCE_CONTINUE:
+      break;
+    case GENIE_DIALOG_SENTENCE_END:
+      std::cout << "[END]" << std::flush;
+      break;
+    case GENIE_DIALOG_SENTENCE_ABORT:
+      std::cout << "[ABORT]: " << std::flush;
+      break;
+    default:
+      std::cout << "[UNKNOWN]: " << std::flush;
+      break;
+  }
+  if (token) {
+    for (uint32_t i = 0; i < tokensLength; i++) {
+      std::cout << token[i] << " " << std::flush;
+    }
+  }
+}
+
+class Dialog {
+ public:
+  class Config {
+   public:
+    Config(const std::string& config) {
+      int32_t status = GenieDialogConfig_createFromJson(config.c_str(), &m_handle);
+      if ((GENIE_STATUS_SUCCESS != status) || (!m_handle)) {
+        throw std::runtime_error("Failed to create the dialog config.");
+      }
+    }
+
+    ~Config() {
+      int32_t status = GenieDialogConfig_free(m_handle);
+      if (GENIE_STATUS_SUCCESS != status) {
+        std::cerr << "Failed to free the dialog config." << std::endl;
+      }
+    }
+
+    GenieDialogConfig_Handle_t operator()() const { return m_handle; }
+
+   private:
+    GenieDialogConfig_Handle_t m_handle = NULL;
+  };
+
+  Dialog(Config config) {
+    int32_t status = GenieDialog_create(config(), &m_handle);
+    if ((GENIE_STATUS_SUCCESS != status) || (!m_handle)) {
+      throw std::runtime_error("Failed to create the dialog.");
+    }
+  }
+
+  ~Dialog() {
+    int32_t status = GenieDialog_free(m_handle);
+    if (GENIE_STATUS_SUCCESS != status) {
+      std::cerr << "Failed to free the dialog." << std::endl;
+    }
+  }
+
+  void query(const std::string prompt) {
+    int32_t status = GenieDialog_query(m_handle,
+                                       prompt.c_str(),
+                                       GenieDialog_SentenceCode_t::GENIE_DIALOG_SENTENCE_COMPLETE,
+                                       queryCallback,
+                                       nullptr);
+    if (GENIE_STATUS_SUCCESS != status) {
+      throw std::runtime_error("Failed to query.");
+    }
+  }
+
+  void save(const std::string name) {
+    int32_t status = GenieDialog_save(m_handle, name.c_str());
+    if (GENIE_STATUS_SUCCESS != status) {
+      throw std::runtime_error("Failed to save.");
+    }
+  }
+
+  void restore(const std::string name) {
+    int32_t status = GenieDialog_restore(m_handle, name.c_str());
+    if (GENIE_STATUS_SUCCESS != status) {
+      throw std::runtime_error("Failed to restore.");
+    }
+  }
+
+#if defined(GENIE_E2T_FEATURE)
+  void embeddingQuery(const void* embeddings, const uint32_t embeddingsSize) {
+    GenieDialog_TokenToEmbeddingCallback_t t2eCallback{nullptr};
+    if (embeddingLutSize > 0) {
+      calculateRequantEncodings();
+      if ((lutDataType == "N/A") && (inputDataType == "N/A")) {
+        t2eCallback = tokenToEmbedCallback;
+      } else if ((lutDataType == "int8") && (inputDataType == "int16")) {
+        t2eCallback = tokenToEmbedRequantCallback<int8_t, int16_t>;
+      } else if ((lutDataType == "int16") && (inputDataType == "int8")) {
+        t2eCallback = tokenToEmbedRequantCallback<int16_t, int8_t>;
+      } else if ((lutDataType == "int16") && (inputDataType == "int16")) {
+        t2eCallback = tokenToEmbedRequantCallback<int16_t, int16_t>;
+      } else if ((lutDataType == "uint8") && (inputDataType == "uint16")) {
+        t2eCallback = tokenToEmbedRequantCallback<uint8_t, uint16_t>;
+      } else if ((lutDataType == "uint16") && (inputDataType == "uint8")) {
+        t2eCallback = tokenToEmbedRequantCallback<uint16_t, uint8_t>;
+      } else if ((lutDataType == "uint16") && (inputDataType == "uint16")) {
+        t2eCallback = tokenToEmbedRequantCallback<uint16_t, uint16_t>;
+      } else {
+        throw std::runtime_error("Unsupported LUT requantization: " + lutDataType + " -> " +
+                                 inputDataType);
+      }
+    }
+    int32_t status =
+        GenieDialog_embeddingQuery(m_handle,
+                                   embeddings,
+                                   embeddingsSize,
+                                   GenieDialog_SentenceCode_t::GENIE_DIALOG_SENTENCE_COMPLETE,
+                                   t2eCallback,
+                                   queryCallback,
+                                   nullptr);
+    if (GENIE_STATUS_SUCCESS != status) {
+      throw std::runtime_error("Failed to query with embedding.");
+    }
+  }
+#endif
+
+  void tokenQuery(const uint32_t* tokens, const uint32_t tokensSize) {
+    GenieDialog_TokenQueryCallback_t tokenCallback{nullptr};
+    if (tokensSize > 0) {
+      tokenCallback = tokenToTokenCallback;
+    }
+    int32_t status =
+        GenieDialog_tokenQuery(m_handle,
+                               tokens,
+                               tokensSize,
+                               GenieDialog_SentenceCode_t::GENIE_DIALOG_SENTENCE_COMPLETE,
+                               tokenCallback,
+                               nullptr);
+    if (GENIE_STATUS_SUCCESS != status) {
+      throw std::runtime_error("Failed to query with tokens.");
+    }
+  }
+
+#if defined(GENIE_LORA_FEATURE)
+  void applyLora(const std::string engine, const std::string loraAdapterName) {
+    int32_t status = GenieDialog_applyLora(m_handle, engine.c_str(), loraAdapterName.c_str());
+    if (GENIE_STATUS_SUCCESS != status) {
+      throw std::runtime_error("Failed to apply the LoRA adapter.");
+    }
+  }
+
+  void setLoraStrength(const std::string engine, const std::string tensorName, const float alpha) {
+    int32_t status =
+        GenieDialog_setLoraStrength(m_handle, engine.c_str(), tensorName.c_str(), alpha);
+    if (GENIE_STATUS_SUCCESS != status) {
+      throw std::runtime_error("Failed to set the LoRA alpha strength.");
+    }
+  }
+#endif
+
+ private:
+  GenieDialog_Handle_t m_handle = NULL;
+};
+
+int main(int argc, char** argv) {
+  if (!parseCommandLineInput(argc, argv)) {
+    return EXIT_FAILURE;
+  }
+
+  std::cout << "Using libGenie.so version " << Genie_getApiMajorVersion() << "."
+            << Genie_getApiMinorVersion() << "." << Genie_getApiPatchVersion() << "\n"
+            << std::endl;
+
+  try {
+    Dialog dialog{Dialog::Config(config)};
+
+#if defined(GENIE_LORA_FEATURE)
+    if (loraAdapterName.length() > 0) {
+      dialog.applyLora("primary", loraAdapterName);
+    }
+    if (!loraAlphaName.empty()) {
+      dialog.setLoraStrength("primary", loraAlphaName, loraAlphaValue);
+    }
+#endif
+
+#if defined(GENIE_E2T_FEATURE)
+    if (embeddingBufferSize != 0) {
+      std::cout << "Embedding file size: " << embeddingBufferSize << " bytes" << std::endl;
+      std::cout << std::endl;
+      dialog.embeddingQuery(embeddingBuffer.get(), embeddingBufferSize);
+      std::cout << std::endl;
+    } else
+#endif
+        if (tokens.size() != 0) {
+      std::cout << "[PROMPT TOKENS]: ";
+      for (int i = 0; i < tokens.size(); ++i) {
+        std::cout << tokens[i] << " ";
+      }
+      std::cout << std::endl;
+      dialog.tokenQuery(tokens.data(), tokens.size());
+      std::cout << std::endl;
+    } else {
+      std::cout << "[PROMPT]: " << prompt.c_str() << std::endl;
+      std::cout << std::endl;
+      if (!restorePath.empty()) {
+        dialog.restore(restorePath);
+      }
+      dialog.query(prompt);
+      if (!savePath.empty()) {
+        dialog.save(savePath);
+      }
+    }
+  } catch (const std::exception& e) {
+    std::cerr << e.what() << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  return EXIT_SUCCESS;
+}
diff --git a/Genie/genie-t2t-run/make/Android.mk b/Genie/genie-t2t-run/make/Android.mk
new file mode 100644
index 0000000000000000000000000000000000000000..b24b4efe724de7e207951f4a229998fe2234ced1
--- /dev/null
+++ b/Genie/genie-t2t-run/make/Android.mk
@@ -0,0 +1,38 @@
+#=============================================================================
+#
+#  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+#  All Rights Reserved.
+#  Confidential and Proprietary - Qualcomm Technologies, Inc.
+#
+#=============================================================================
+
+LOCAL_PATH := $(call my-dir)
+SUPPORTED_TARGET_ABI := arm64-v8a x86 x86_64
+
+#============================ Verify Target Info and Application Variables =========================================
+ifneq ($(filter $(TARGET_ARCH_ABI),$(SUPPORTED_TARGET_ABI)),)
+    ifneq ($(APP_STL), c++_shared)
+        $(error Unsupported APP_STL: "$(APP_STL)")
+    endif
+else
+    $(error Unsupported TARGET_ARCH_ABI: '$(TARGET_ARCH_ABI)')
+endif
+
+#============================ Define Common Variables ===============================================================
+# Include paths
+PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../../../../include/Genie
+
+
+include $(CLEAR_VARS)
+LOCAL_MODULE := libGenie
+LOCAL_SRC_FILES := ../../../../lib/aarch64-android/libGenie.so
+include $(PREBUILT_SHARED_LIBRARY)
+
+include $(CLEAR_VARS)
+LOCAL_C_INCLUDES               := $(PACKAGE_C_INCLUDES)
+MY_SRC_FILES                   := $(wildcard $(LOCAL_PATH)/../main.cpp)
+
+LOCAL_MODULE                   := genie-t2t-run
+LOCAL_SRC_FILES                := $(subst make/,,$(MY_SRC_FILES))
+LOCAL_SHARED_LIBRARIES         := libGenie
+include $(BUILD_EXECUTABLE)
diff --git a/Genie/genie-t2t-run/make/Application.mk b/Genie/genie-t2t-run/make/Application.mk
new file mode 100644
index 0000000000000000000000000000000000000000..65d0856a824781a36f559e38d06a33c506958dff
--- /dev/null
+++ b/Genie/genie-t2t-run/make/Application.mk
@@ -0,0 +1,14 @@
+#=============================================================================
+#
+#  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+#  All Rights Reserved.
+#  Confidential and Proprietary - Qualcomm Technologies, Inc.
+#
+#=============================================================================
+
+APP_ABI      := arm64-v8a
+APP_STL      := c++_shared
+APP_PLATFORM := android-21
+APP_MODULES := genie-t2t-run
+APP_CPPFLAGS += -std=c++2a -O3 -Wall -frtti -fexceptions -fvisibility=hidden 
+APP_LDFLAGS  += -lc -lm -ldl
diff --git a/Genie/genie-t2t-run/make/Makefile.linux-x86_64 b/Genie/genie-t2t-run/make/Makefile.linux-x86_64
new file mode 100644
index 0000000000000000000000000000000000000000..c1425fda9406b0dc4c7680c9ba9ee747a81386ed
--- /dev/null
+++ b/Genie/genie-t2t-run/make/Makefile.linux-x86_64
@@ -0,0 +1,73 @@
+#=============================================================================
+#
+#  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+#  All Rights Reserved.
+#  Confidential and Proprietary - Qualcomm Technologies, Inc.
+#
+#=============================================================================
+
+SOURCES := main.cpp
+
+GENIE_C_API_HEADERS_INCLUDE := ../../../include/Genie
+
+# Checking if clang++ is present. If not switch to clang++
+ifeq ($(shell $(CXX) -v 2>&1 | grep -c "clang version"), 0)
+CXX := clang++
+endif
+
+QNN_TARGET ?= x86_64-linux-clang
+export TARGET_DIR := ./bin/$(QNN_TARGET)
+
+genie-t2t-run := $(TARGET_DIR)/genie-t2t-run
+
+
+# define target architecture if not previously defined, default is x86
+ifndef TARGET_AARCH_VARS
+TARGET_AARCH_VARS:= -march=x86-64
+endif
+
+all: $(genie-t2t-run)
+
+
+# Include paths
+INCLUDES += -I$(GENIE_C_API_HEADERS_INCLUDE)
+
+# set compiler flags
+COMMON_CXXFLAGS = -std=c++2a -frtti -fPIC -Wall -g -pthread -nostdinc++ -stdlib=libc++ -idirafter /usr/lib/llvm-14/include/c++/v1 -nostdinc -idirafter /usr/lib/llvm-14/lib/clang/14.0.0/include/ -idirafter /usr/include $(INCLUDES)
+COMMON_LDFLAGS = -L../../../lib/x86_64-linux-clang -lGenie
+
+COMMON_CFLAGS = -nostdinc -idirafter /usr/lib/llvm-14/lib/clang/14.0.0/include/ -idirafter /usr/include
+
+#TODO configure for these feature flags
+CXXFLAGS += $(COMMON_CXXFLAGS) -march=x86-64 -O3 -Wno-write-strings -fvisibility=hidden 
+CFLAGS += $(COMMON_CFLAGS)
+LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto
+
+
+OBJ_DIR_MAIN := obj/$(QNN_TARGET)
+LIBS=-ldl
+
+# Rule to make shared lib
+.PHONY: genie-t2t-run
+genie-t2t-run: $(genie-t2t-run)
+
+# Compile rules
+$(OBJ_DIR_MAIN)/%.o: main.cpp
+	$(CXX) $(CXXFLAGS) -c $^ -o $@
+
+# set up resources
+directories := $(TARGET_DIR) $(OBJ_DIR_MAIN)
+
+$(genie-t2t-run): $(OBJ_DIR_MAIN)/main.o | $(directories)
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
+
+
+$(OBJ_DIR_MAIN)/main.o: | $(OBJ_DIR_MAIN)
+
+# rule to create directories
+$(directories):
+	mkdir -p $@
+
+.PHONY: clean
+clean:
+	rm -rf obj $(TARGET_DIR)